# Node ID 0b5d815d078ba95a6d4c075fe2e113460ff5a395 # Parent 725fef71f3767b4b8899584db71147e9920b18f3 diff --git a/configs/example/synchrotrace_classic_memory.py b/configs/example/synchrotrace_classic_memory.py new file mode 100755 --- /dev/null +++ b/configs/example/synchrotrace_classic_memory.py @@ -0,0 +1,249 @@ +# Copyright (c) 2015-2016 ARM Limited +# All rights reserved. +# +# The license below extends only to copyright in the software and shall +# not be construed as granting a license to any other intellectual +# property including but not limited to intellectual property relating +# to a hardware implementation of the functionality of the software +# licensed hereunder. You may use the software subject to the license +# terms below provided that you ensure that this notice is replicated +# unmodified and in its entirety in all distributions of the software, +# modified or unmodified, in source code or in binary form. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer; +# redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution; +# neither the name of the copyright holders nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# Copyright (c) 2015, Drexel University +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: +# +# 1. Redistributions of source code must retain the above copyright notice, +# this list of conditions and the following disclaimer. +# +# 2. Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# +# 3. Neither the name of the copyright holder nor the names of its +# contributors may be used to endorse or promote products derived from this +# software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS +# IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, +# THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR +# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +# LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +# NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# +# Authors: Karthik Sangaiah +# Ankit More +# Radhika Jagtap +# +# Instantiate SynchroTrace with classic memory +# + +import m5 +from m5.objects import * +from m5.defines import buildEnv +from m5.util import addToPath +from m5.util.convert import toMemorySize +from math import floor, ceil +import os, optparse, sys +addToPath('../common') +addToPath('../topologies') + +import Options +import Simulation +import MemConfig +from Caches import * + +def config_caches(options, system): + # Set L1 and L2 as default cache classes + dcache_class, l2_cache_class = L1_DCache, L2Cache + + # Create a resonable L3 Cache class for 4 to 8 cores + class L3Cache(L2Cache): + size = '8192kB' + assoc = 16 + hit_latency = 20 + response_latency = 20 + mshrs = 20 + tgts_per_mshr = 12 + write_buffers = 8 + + l3_cache_class = L3Cache + + # Number of clusters + if options.num_cpus % options.cpus_per_cluster: + fatal("num_cpus %s is not exactly divisible by cpus_per_cluster %s" % + (options.num_cpus, options.cpus_per_cluster)) + num_clusters = int(options.num_cpus / options.cpus_per_cluster) + + # First create a list, instantiate caches and later connect them + l1_caches = [] + l2_caches = [] + l2_crossbar = [] + + # Instantiate L3 cache and crossbar if user sets the option + # It is shared across all clusters so connect the downstream port to the + # memory bus + if options.l3cache: + l3_cache = l3_cache_class() + l3_crossbar = L2XBar(width = 64) + l3_cache.cpu_side = l3_crossbar.master + l3_cache.mem_side = system.membus.slave + + # Setup an L2 per cluster shared by the cores in the cluster + for i in xrange(num_clusters): + l2_crossbar.append(L2XBar(width = 64, + snoop_filter = SnoopFilter(max_capacity = '16MB'))) + l2_crossbar[i].clk_domain = cluster_clk_domain + l2_caches.append(L2Cache(size = '1024kB')) + l2_caches[i].clk_domain = cluster_clk_domain + l2_caches[i].cpu_side = l2_crossbar[i].master + if not options.l3cache: + l2_caches[i].mem_side = system.membus.slave + else: + # If there is an L3 in the system connect to it + l2_caches[i].mem_side = l3_crossbar.slave + + for cpu_id in xrange(options.num_cpus): + cluster_id = int(cpu_id / options.cpus_per_cluster) + l1_caches.append(L1_DCache(size = '32kB')) + l1_caches[cpu_id].clk_domain = cluster_clk_domain + l1_caches[cpu_id].mem_side = l2_crossbar[cluster_id].slave + l1_caches[cpu_id].cpu_side = system.tester.cpu_port[cpu_id] + + # Attach all instantiated list of caches as a child to the system + system.toL2bus = l2_crossbar + system.l2caches = l2_caches + system.l1caches = l1_caches + if options.l3cache: + system.l3cache = l3_cache + system.toL3bus = l3_crossbar + +# Get relevant paths +config_path = os.path.dirname(os.path.abspath(__file__)) +config_root = os.path.dirname(config_path) + +# Add gem5 options +parser = optparse.OptionParser() +Options.addCommonOptions(parser) + +# Add SynchroTrace specific options +# Mandatory to set the path to the traces directory +parser.add_option("--event-dir", type = "string", default = "", + help = "path to the directory containing event traces") +parser.add_option("--output-dir", type = "string", default = "", + help = "path to the directory where to dump the output") + +# To set the number of threads (must equal the number of threads traced) +# In the intuitive case, the --num-cpus equals the number of threads +parser.add_option("--num-threads", type = "int", help = "Number of threads") + +# Number of cpus per cluster that share L2 cache +parser.add_option("--cpus-per-cluster", type = "int", + help = "Number of CPUs per Cluster") + +# Other synchrotrace behaviour options that have defaults +parser.add_option("--master-freq", type = "int", default = 1000, + help = "Frequency at which to wake up master event") +parser.add_option("--cpi-iops", type = "float", default = 1, + help = "CPI for integer ops") +parser.add_option("--cpi-flops", type = "float", default = 1, + help = "CPI for floating point ops") +parser.add_option("--pc-skip", action = "store_true", default = False, + help = "Don't enforce producer->consumer dependencies") + +# Memory-system configuration options +parser.add_option("--l3cache", action = "store_true", default = False) +parser.add_option("--membus-width", action = "store", type = "int", + default = "64", help = "Width of System XBar") + +# Clocks and maxtick +parser.add_option("--cluster-clock", action = "store", type = "string", + default = '1.7GHz', + help = "Clock for blocks running in the cluster") +parser.add_option("--memsys-clock", action = "store", type = "string", + default = '1.6GHz', help = "Clock for Memory System") + +# execfile(os.path.join(config_root, "common", "Options.py")) +(options, args) = parser.parse_args() + +if args: + print "Error: script doesn't take any positional arguments" + sys.exit(1) + +# Create the system +system = System(cache_line_size = options.cacheline_size, + membus = SystemXBar(width = options.membus_width), + mem_ranges = AddrRange(options.mem_size)) +MemConfig.config_mem(options, system) + +# Create voltage and clock domains +system.voltage_domain = VoltageDomain(voltage = '1V') + +system.clk_domain = SrcClockDomain(clock = options.sys_clock, + voltage_domain = system.voltage_domain) + +cluster_clk_domain = SrcClockDomain(clock = options.cluster_clock, + voltage_domain = system.voltage_domain) + +memsys_clk_domain = SrcClockDomain(clock = options.memsys_clock, + voltage_domain = system.voltage_domain) + +# Create the SynchroTrace Replay Mechanism +system.tester = SynchroTrace(num_cpus = options.num_cpus, + num_threads = options.num_threads, + event_dir = options.event_dir, + output_dir = options.output_dir, + master_wakeup_freq = options.master_freq, + cpi_iops = options.cpi_iops, + cpi_flops = options.cpi_flops, + ruby = options.ruby, + block_size_bytes = options.cacheline_size, + mem_size_bytes = toMemorySize(options.mem_size), + pc_skip = options.pc_skip) + +# Set memory and cluster clock domains +system.membus.clk_domain = memsys_clk_domain + +for mem_ctrl in system.mem_ctrls: + mem_ctrl.clk_domain = memsys_clk_domain + +system.tester.clk_domain = cluster_clk_domain + +# Create the cache hierarchy and busses +config_caches(options, system) + +# The system port is never used in the tester so merely connect it +# to avoid problems +system.system_port = system.membus.slave + +# Setup simulation +root = Root(full_system = False, system = system) +root.system.mem_mode = 'timing' + +# Instantiate configuration +m5.instantiate() + +# Simulate until program terminates +exit_event = m5.simulate(m5.MaxTick) + +print 'Exiting @ tick', m5.curTick(), 'because', exit_event.getCause() diff --git a/src/cpu/testers/synchrotrace/SConscript b/src/cpu/testers/synchrotrace/SConscript new file mode 100644 --- /dev/null +++ b/src/cpu/testers/synchrotrace/SConscript @@ -0,0 +1,54 @@ +# -*- mode:python -*- + +# Copyright (c) 2015-2016 ARM Limited +# All rights reserved. +# +# The license below extends only to copyright in the software and shall +# not be construed as granting a license to any other intellectual +# property including but not limited to intellectual property relating +# to a hardware implementation of the functionality of the software +# licensed hereunder. You may use the software subject to the license +# terms below provided that you ensure that this notice is replicated +# unmodified and in its entirety in all distributions of the software, +# modified or unmodified, in source code or in binary form. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer; +# redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution; +# neither the name of the copyright holders nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# +# Authors: Karthik Sangaiah + +Import('*') + +if env['TARGET_ISA'] == 'null': + Return() + +SimObject('SynchroTrace.py') + +Source('st_parser.cc') +Source('synchro_trace.cc') + +DebugFlag('ROI') +DebugFlag('STDebug') +DebugFlag('STEventPrint') +DebugFlag('STIntervalPrint') +DebugFlag('STMutexLogger') diff --git a/src/cpu/testers/synchrotrace/SynchroTrace.py b/src/cpu/testers/synchrotrace/SynchroTrace.py new file mode 100644 --- /dev/null +++ b/src/cpu/testers/synchrotrace/SynchroTrace.py @@ -0,0 +1,103 @@ +# Copyright (c) 2015-2016 ARM Limited +# All rights reserved. +# +# The license below extends only to copyright in the software and shall +# not be construed as granting a license to any other intellectual +# property including but not limited to intellectual property relating +# to a hardware implementation of the functionality of the software +# licensed hereunder. You may use the software subject to the license +# terms below provided that you ensure that this notice is replicated +# unmodified and in its entirety in all distributions of the software, +# modified or unmodified, in source code or in binary form. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer; +# redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution; +# neither the name of the copyright holders nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# Copyright (c) 2015, Drexel University All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# 1. Redistributions of source code must retain the above copyright notice, +# this list of conditions and the following disclaimer. +# +# 2. Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# 3. Neither the name of the copyright holder nor the names of its contributors +# may be used to endorse or promote products derived from this software +# without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# +# Authors: Karthik Sangaiah +# Ankit More +# Radhika Jagtap +# +# SynchroTrace is the trace replay module that plays back traces generated by +# a tool called Sigil [1]. The traces are per-thread and record not only +# compute operations and memory accesses local to the thread but also inter- +# thread communication and synchronisation primitives like locks and barriers. +# The information in the traces is architecture-agnostic. The traces are then +# replayed by the SynchroTrace model in gem5 to achieve light-weight multicore +# simulation [2]. +# +# Sigil, the tool used to generate the traces is not included in gem5 but +# lives in a public repo on github [3] and has user documentation [4]. +# +# References: +# +# [1] "Platform-independent analysis of function-level communication in +# workloads", Siddharth Nilakantan and Mark Hempstead, IISWC 2013. +# +# [2] "Synchrotrace: synchronization-aware architecture-agnostic traces for +# light-weight multicore simulation", Siddharth Nilakantan, Karthik Sangaiah, +# Ankit More and Giordano Salvadory, ISPASS 2015. +# +# [3] https://github.com/mikelui/Sigil2 +# +# [4] http://sigil2.readthedocs.io/en/latest/about.html +# + +from MemObject import MemObject +from m5.params import * +from m5.proxy import * + +class SynchroTrace(MemObject): + """SynchroTrace replay model which replays multi-threaded traces generated + by Sigil. It interfaces with the Classic Memory System or Ruby. + """ + type = 'SynchroTrace' + cxx_header = "cpu/testers/synchrotrace/synchro_trace.hh" + num_cpus = Param.Int("Number of cpus / Memory Ports") + num_threads = Param.Int("Number of threads") + cpu_port = VectorMasterPort("Cpu ports") + event_dir = Param.String("Location of the events profile") + output_dir = Param.String("Directory path to dump the output") + master_wakeup_freq = Param.Int(1, "How often to wakeup the master event") + cpi_iops = Param.Float(1, "CPI for integer ops") + cpi_flops = Param.Float(2, "CPI for floating point ops") + ruby = Param.Bool(False,"Are we using Ruby?") + block_size_bytes = Param.Int(64, "Cache Line Size") + mem_size_bytes = Param.UInt64("Memory Size") + pc_skip = Param.Bool("Skip P->C dependencies") + system = Param.System(Parent.any, "System we belong to") diff --git a/src/cpu/testers/synchrotrace/st_event.hh b/src/cpu/testers/synchrotrace/st_event.hh new file mode 100644 --- /dev/null +++ b/src/cpu/testers/synchrotrace/st_event.hh @@ -0,0 +1,361 @@ +/* + * Copyright (c) 2015-2016 ARM Limited + * All rights reserved + * + * The license below extends only to copyright in the software and shall + * not be construed as granting a license to any other intellectual + * property including but not limited to intellectual property relating + * to a hardware implementation of the functionality of the software + * licensed hereunder. You may use the software subject to the license + * terms below provided that you ensure that this notice is replicated + * unmodified and in its entirety in all distributions of the software, + * modified or unmodified, in source code or in binary form. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * Copyright (c) 2015, Drexel University + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * + * 1. Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. Neither the name of the copyright holder nor the names of its + * contributors may be used to endorse or promote products derived from this + * software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS + * IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, + * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR + * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Authors: Karthik Sangaiah + * Ankit More + * Radhika Jagtap + * + * Defines an event in the queue for synchronization and event-based + * dependency tracking + */ + +#ifndef __CPU_TESTERS_SYNCHROTRACE_STEVENT_HH__ +#define __CPU_TESTERS_SYNCHROTRACE_STEVENT_HH__ + +#include + +#include "sim/system.hh" + +/** + * This struct contains the memory access information needed to create a + * request. + */ +struct MemAddrInfo { + + /** Thread ID corresponding to the trace used to generate this request */ + ThreadID reqThreadID; + + /** Event ID for the sub event that this request is linked to */ + unsigned long reqEventID; + + /** Physical address */ + Addr addr; + + /** Size of the request in bytes */ + unsigned int numBytes; + + /** Constructors */ + MemAddrInfo() { } + + MemAddrInfo(ThreadID thread_id, unsigned long event_id, + Addr addr, unsigned int num_bytes) + : reqThreadID(thread_id), + reqEventID(event_id), + addr(addr), + numBytes(num_bytes) + { } + + MemAddrInfo(Addr addr, unsigned int num_bytes) + : reqThreadID(-1), + reqEventID(-1), + addr(addr), + numBytes(num_bytes) + { } +}; + +/** + * This struct contains information needed to calculate the timing, e.g. + * number of integer or floating point operations, and a pointer to the + * MemAddrInfo. + */ +struct SubEvent { + + /** + * Memory request type for a sub event + * + * REQ_READ A read type request + * REQ_WRITE A write type request + */ + enum EventReqType { + REQ_READ, + REQ_WRITE + }; + + /** The number of integer ops that this sub event is broken down into */ + uint64_t numIOPS; + + /** + * The number of floating point ops that this sub event is broken down + * into. + */ + uint64_t numFLOPS; + + /** Whether the request is read or write */ + EventReqType msgType; + + /** The time at which to send the request */ + Tick triggerTime; + + /** Set to true when request is sent */ + bool msgTriggered; + + /** Whether the event has a request associated with it */ + bool containsMsg; + + /** Pointer to the struct holding request related information */ + MemAddrInfo *thisMsg; + + /** Constructors */ + SubEvent() { } + + SubEvent(unsigned long i_ops, unsigned long f_ops, + EventReqType req_type, bool req_sent, bool contains_req, + MemAddrInfo *req_info_ptr) + : numIOPS(i_ops), + numFLOPS(f_ops), + msgType(req_type), + msgTriggered(req_sent), + containsMsg(contains_req), + thisMsg(req_info_ptr) + { } + + SubEvent(unsigned long i_ops, unsigned long f_ops, + bool req_sent, bool contains_req) + : numIOPS(i_ops), + numFLOPS(f_ops), + msgTriggered(req_sent), + containsMsg(contains_req) + { } +}; + +/** + * STEvent encapsulates a record in the trace as an event during trace replay + */ +class STEvent +{ + public: + + /** + * Events have a broad classification which is captured in EventClass + * + * EVENT_CLASS_NONE Initialisation value + * + * COMP Computation event, that is a local read, local write + * or an integer or floating point operation + * + * COMM An event for communication between threads + * + * THREAD_API Calls to thread library such as thread creation, join, + * barriers and locks + */ + enum EventClass { + EVENT_CLASS_NONE, + COMP, + COMM, + THREAD_API + }; + + /** + * The type within the THREAD_API class of events + * + * INVALID_EVENT Initialization value + * + * MUTEX_LOCK Mutex lock event simulating lock acquire + * + * MUTEX_UNLOCK Mutex unlock event simulating lock release + * + * THREAD_CREATE New thread creation event + * + * THREAD_JOIN Thread join + * + * BARRIER_WAIT Synchronisation barrier + * + * COND_WAIT Pthread condition wait + * + * COND_SG Pthread condition signal + * + * SPIN_LOCK Pthread spin lock + * + * SPIN_UNLOCK Pthread spin unlock + * + * SEM_INIT Initialise a semaphore + * + * SEM_WAIT Block on a semaphore count + * + * SEM_POST Increment a semaphore + * + */ + enum EventType { + INVALID_EVENT = 0, + MUTEX_LOCK = 1, + MUTEX_UNLOCK = 2, + THREAD_CREATE = 3, + THREAD_JOIN = 4, + BARRIER_WAIT = 5, + COND_WAIT = 6, + COND_SG = 7, + SPIN_LOCK = 8, + SPIN_UNLOCK = 9, + SEM_INIT = 10, + SEM_WAIT = 11, + SEM_POST = 12, + SEM_GETV = 13, + SEM_DEST = 14 + }; + + /** Typedef for the memory address information vector */ + typedef std::vector vectorMemAddr; + + /** + * The class of event to specify if the event captures local compute, + * inter-thread communication or thread API calls + */ + EventClass eventClass; + + /** To specify the type of thread API call, for e.g. mutex lock */ + EventType eventType; + + /** Unique ID of the event */ + unsigned long eventID; + + /** Thread ID corresponding to the event */ + ThreadID evThreadID; + + /** + * Addresses which are written by some other thread and read by this thread + */ + vectorMemAddr commPreRequisiteEvents; + + /** Addresses that this event writes which are private to its thread */ + vectorMemAddr compWriteEvents; + + /** Addresses that this event reads which are provate to its thread */ + vectorMemAddr compReadEvents; + + /** Number of integer operations */ + unsigned long compIOPS; + + /** Number of floating point operations */ + unsigned long compFLOPS; + + /** Number of reads to private memory ie local to event's thread */ + unsigned long compMemReads; + + /** Number of writes to private memory, ie local to event's thread */ + unsigned long compMemWrites; + + /** Set to true when sub events are created to handle each operation */ + bool subEventsCreated; + + /** + * Address of the critical variable used in Pthread calls, for e.g. the + * mutex lock address, barrier variable address or address of the input + * variable that holds the thread information when creating a new thread + */ + Addr pthAddr; + + /** + * A queue of sub events which hold operations that are bundled up in + * STEvent + */ + std::deque *subEventList; + + /** Constructor */ + STEvent() + : eventClass(EVENT_CLASS_NONE), + eventType(INVALID_EVENT), + eventID(-1), + evThreadID(-1), + compIOPS(-1), + compFLOPS(-1), + compMemReads(-1), + compMemWrites(-1), + subEventsCreated(false) + { } + + /** Desctructor */ + ~STEvent() { + if (subEventList) + delete subEventList; + + for (auto& event : commPreRequisiteEvents) + delete event; + for (auto& event : compWriteEvents) + delete event; + for (auto& event : compReadEvents) + delete event; + } +}; + +// Output overloading +inline std::ostream &operator <<(std::ostream &os, + const STEvent &event) +{ + os << "Class:" << event.eventClass; + os << " EventID:" << event.eventID; + os << " ThreadID:" << event.evThreadID; + + if (event.eventClass == STEvent::COMP){ + os << " compIOPS:" << event.compIOPS; + os << " compFLOPS:" << event.compFLOPS; + os << " compMemReads:" << event.compMemReads; + os << " compMemWrites:" << event.compMemWrites; + } + + os << " Sub-events creation status:" << event.subEventsCreated; + + return os; +} + +inline std::ostream &operator <<(std::ostream &os, + const MemAddrInfo &mem_addr_info) +{ + os << " reqThreadID:" << mem_addr_info.reqThreadID; + os << " reqEventID:" << mem_addr_info.reqEventID; + os << " addr:" << mem_addr_info.addr; + os << " numBytes:" << mem_addr_info.numBytes; + + return os; +} +#endif diff --git a/src/cpu/testers/synchrotrace/st_parser.hh b/src/cpu/testers/synchrotrace/st_parser.hh new file mode 100644 --- /dev/null +++ b/src/cpu/testers/synchrotrace/st_parser.hh @@ -0,0 +1,265 @@ +/* + * Copyright (c) 2015-2016 ARM Limited + * All rights reserved + * + * The license below extends only to copyright in the software and shall + * not be construed as granting a license to any other intellectual + * property including but not limited to intellectual property relating + * to a hardware implementation of the functionality of the software + * licensed hereunder. You may use the software subject to the license + * terms below provided that you ensure that this notice is replicated + * unmodified and in its entirety in all distributions of the software, + * modified or unmodified, in source code or in binary form. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * Copyright (c) 2015, Drexel University + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * + * 1. Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. Neither the name of the copyright holder nor the names of its + * contributors may be used to endorse or promote products derived from this + * software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS + * IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, + * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR + * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Authors: Karthik Sangaiah & Ankit More + * + * Parses Sigil event trace files and Sigil Pthread file + */ + +#ifndef __CPU_TESTERS_SYNCHROTRACE_STPARSER_HH +#define __CPU_TESTERS_SYNCHROTRACE_STPARSER_HH + +#include + +#include +#include +#include +#include +#include + +#include "st_event.hh" + +/** + * The STParser reads the trace files from disk and parses them line by + * line to create an STEvent per line item. The line fields are parsed by + * delimiter chars like comma, asterix and hash. + */ +class STParser +{ + public: + // Request size type + typedef uint8_t requestSize; + + // The maximum size in bytes of a request being issued to memory + static const requestSize maxRequestSize = 8; + + private: + + // Block size type read from file + typedef uint16_t FileBlockSize; + + // Typedef for specifying number of entries + typedef uint8_t numEntries; + + // Size of the block to be read from the file in one go + const FileBlockSize readBlock = 1000; + + // Threshold for filling up event queue + const size_t minEventsSize = 100; + + // Number of comp event entries + numEntries compEntries = 6; + + // Number of inter thread communication event entries + numEntries commEntries = 2; + + // Number of shared data read entries + numEntries commSharedInfoEntries = 4; + + // Number of local write event entries + numEntries compWriteEntries = 2; + + // Number of local read event entries + numEntries compReadEntries = 2; + + // Number of pthread event entries + numEntries pthreadEntries = 3; + + // Pthread tag + const std::string pthreadTag = "pth_ty"; + + // Initial book keeping + + /** Parses Sigil Pthread file for Pthread meta-data */ + void processPthreadFile(); + + /** Creates pthread address to thread ID map */ + void processAddressToID(std::string line, size_t hash_pos); + + /** Creates set of barriers used in application */ + void processBarrierEvent(std::string line, size_t star_pos); + + /** Initializes sigil trace and output file pointers */ + void initSigilFilePointers(); + + /** Read event from Sigil event file and determine event type */ + void readEventFile(ThreadID thread_id); + + /** + * Parse communication event and add to event map. Communication events + * represent RAW dependency between threads. Parse dependencies and + * corresponding memory reads. Format modified virtual memory addresses + * into requests for the memory system. + */ + void processCommEvent(std::string this_event, size_t hash_pos); + + /** + * Parse computation events and add to event map. Computation events + * represent abstract form of many integer/floating point operations + * between loads and stores. Default: 1 LD/ST per computation event. For + * compression: variable number of LD/STs per computation event. Format + * modified virtual memory addresses into requests for the memory system. + */ + void processCompEvent(std::string this_event); + + /** Parse beginning of computation event */ + void processCompMainEvent(std::string this_event, STEvent *new_event); + + /** Parse write protion of computation event */ + void processCompWriteEvent(std::string dependency_info, + STEvent *new_event); + + /** Parse read portion of computation event */ + void processCompReadEvent(std::string dependency_info, STEvent *new_event); + + /** Parse synchronization event and add to event map. */ + void processPthreadEvent(std::string this_event, size_t caret_pos); + + // Pthread Meta-data + + /** Map converting each slave thread's pthread address to thread ID */ + std::map addressToIDMap; + + /** Holds barriers used in application */ + std::map> barrierMap; + + /** Number of threads set from the cmd line */ + int numThreads; + + // File Directories + /** Directory of Sigil Traces and Pthread metadata file */ + std::string eventDir; + + /** Directory of output files */ + std::string outputDir; + + /** Block size in bytes */ + int blockBytes; + + /** Block size in bits */ + int blockBits; + + /** Main Memory size in bytes */ + uint64_t memoryBytes; + + /** Main Memory size in terms of number of cache lines */ + uint64_t numCacheLines; + + /** Pthread meta-data file pointer */ + std::ifstream pthreadFilePointer; + + /** Sigil trace file pointers */ + std::vector inputFilePointer; + + /** Output file pointers */ + std::vector outputFilePointer; + + public: + /** Return map of Pthread addresses/Thread ID */ + std::map getAddressToIDMap() { + return addressToIDMap; + }; + + /** Return map of barriers */ + std::map> getBarrierMap() { + return barrierMap; + }; + + /** Return sigil trace file pointers */ + std::vector getInputFilePointer() { + return inputFilePointer; + }; + + /** Return output file pointers */ + std::vector getOutputFilePointer() { + return outputFilePointer; + }; + + /** + * Central event map: list of pointers to event deques of each thread. + * Each deque reads in 1000 events each time the deque size falls to + * 100 events. + */ + std::deque **eventMap; + + /** Parse initial events for eventMap */ + void generateEventQueue(); + + /** + * Add more events into the eventMap when eventMap size for a particular + * thread falls under 100 events. + */ + void replenishEvents(ThreadID thread_id); + + /** Default Parser Constructor */ + STParser(int num_threads, std::deque **event_map, + std::string event_dir, std::string output_dir, + uint64_t mem_size_bytes, int block_size_bytes, + int block_size_bits) { + numThreads = num_threads; + eventMap = event_map; + eventDir = event_dir; + outputDir = output_dir; + memoryBytes = mem_size_bytes; + blockBytes = block_size_bytes; + blockBits = block_size_bits; + processPthreadFile(); + initSigilFilePointers(); + numCacheLines = mem_size_bytes / block_size_bytes; + } + + ~STParser() { + } +}; +#endif // __CPU_TESTERS_SYNCHROTRACE_STPARSER_HH diff --git a/src/cpu/testers/synchrotrace/st_parser.cc b/src/cpu/testers/synchrotrace/st_parser.cc new file mode 100644 --- /dev/null +++ b/src/cpu/testers/synchrotrace/st_parser.cc @@ -0,0 +1,588 @@ +/* + * Copyright (c) 2015-2016 ARM Limited + * All rights reserved + * + * The license below extends only to copyright in the software and shall + * not be construed as granting a license to any other intellectual + * property including but not limited to intellectual property relating + * to a hardware implementation of the functionality of the software + * licensed hereunder. You may use the software subject to the license + * terms below provided that you ensure that this notice is replicated + * unmodified and in its entirety in all distributions of the software, + * modified or unmodified, in source code or in binary form. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * Copyright (c) 2015, Drexel University + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * + * 1. Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. Neither the name of the copyright holder nor the names of its + * contributors may be used to endorse or promote products derived from this + * software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS + * IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, + * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR + * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Authors: Karthik Sangaiah & Ankit More + * + * Parses Sigil event trace files and Sigil Pthread file + */ + +#include "st_parser.hh" + +using namespace std; + +void +STParser::processPthreadFile() +{ + + std::string fname_str = csprintf("%s/sigil.pthread.out", + eventDir.c_str()); + char *fname = strdup(fname_str.c_str()); + pthreadFilePointer.open(fname); + if (!pthreadFilePointer.is_open()) { + panic("Failed to open pthread file"); + } + free(fname); + while (pthreadFilePointer.good() && !pthreadFilePointer.eof()) { + string pthread_file_line; + if (getline(pthreadFilePointer, pthread_file_line)) { + size_t hash_pos = pthread_file_line.rfind('#'); + size_t star_pos = pthread_file_line.rfind('*'); + + if (hash_pos != string::npos) + processAddressToID(pthread_file_line, hash_pos); + else + processBarrierEvent(pthread_file_line, star_pos); + } else + break; + } + pthreadFilePointer.close(); +} + +void +STParser::processAddressToID(string line, size_t hash_pos){ + string thread_addr_line = line.substr(hash_pos + 1); + size_t comma_pos = thread_addr_line.find(','); + string thread_address_string = thread_addr_line.substr(0, comma_pos); + string thread_id_string = thread_addr_line.substr(comma_pos + 1); + Addr thread_address = strtoul(thread_address_string.c_str(), NULL, 0); + ThreadID thread_id = strtol(thread_id_string.c_str(), NULL, 0); + assert(thread_id >= 0); + + // Keep Thread IDs in a map + addressToIDMap[thread_address] = thread_id - 1; +} + +void +STParser::processBarrierEvent(string line, size_t star_pos){ + string barrier_line = line.substr(star_pos + 1); + size_t comma_pos = barrier_line.find(','); + size_t old_pos = 0; + string barrier_address_string = barrier_line.substr(0, comma_pos); + string tidlist = barrier_line.substr(comma_pos + 1); + comma_pos = tidlist.find(','); + + set thread_ids; + + while (comma_pos != string::npos) { + ThreadID thread_id = strtol(tidlist.substr(old_pos, comma_pos).c_str(), + NULL, 0) - 1; + assert(thread_id >= 0); + thread_ids.insert(thread_id); + old_pos = comma_pos + 1; + comma_pos = tidlist.find(',', comma_pos + 1); + } + + Addr barrier_address = strtoul(barrier_address_string.c_str(), NULL, 0); + barrierMap[barrier_address] = thread_ids; +} + +void +STParser::initSigilFilePointers() +{ + inputFilePointer.resize(numThreads); + outputFilePointer.resize(numThreads); + + for (int i = 0; i < numThreads; i++) { + std::string fname_str = csprintf("%s/sigil.events.out-%d.gz", + eventDir.c_str(), i + 1); + char *fname = strdup(fname_str.c_str()); + + inputFilePointer[i] = new gzifstream(fname); + if (inputFilePointer[i]->fail()) { + panic("Failed to open file: %s\n", fname); + } + + fname_str = csprintf("%s/eventTimeOutput-%d.csv.gz", + outputDir.c_str(), i + 1); + fname = strdup(fname_str.c_str()); + + outputFilePointer[i] = new gzofstream(fname); + if (outputFilePointer[i]->fail()) { + panic("ERROR!: Not able to create event file %s. Aborting!!\n", + fname); + } + free(fname); + } +} + +void +STParser::generateEventQueue(){ + for (int i = 0; i < numThreads; i++) { + readEventFile(i); + } +} + +void +STParser::replenishEvents(ThreadID thread_id) { + if (eventMap[thread_id]->size() < minEventsSize) + readEventFile(thread_id); +} + +void +STParser::readEventFile(ThreadID thread_id) +{ + string this_event; + + for (FileBlockSize count = 0; count < readBlock; count++) { + if (!getline(*(inputFilePointer[thread_id]), this_event)) { + break; + } else { + size_t hash_pos = this_event.find('#'); + size_t caret_pos = this_event.find('^'); + + if (hash_pos != string::npos) { + processCommEvent(this_event, hash_pos); + } else if (caret_pos != string::npos){ + processPthreadEvent(this_event, caret_pos); + } else { + processCompEvent(this_event); + } + } + } +} + +void +STParser::processCommEvent(string this_event, size_t hash_pos) +{ + string event_info; + + event_info = this_event.substr(0, hash_pos - 1); + STEvent *new_event = new STEvent(); + new_event->subEventList = NULL; + new_event->eventClass = STEvent::COMM; + + size_t cur_pos = -1; + for (numEntries i = 0; i < commEntries; i++) { + size_t next_pos = event_info.find(',', cur_pos + 1); + string this_entry = event_info.substr(cur_pos + 1, + next_pos - (cur_pos + 1)); + cur_pos = next_pos; + + switch(i) { + case 0: + new_event->eventID = strtoul(this_entry.c_str(), NULL, 0); + break; + case 1: + new_event->evThreadID = atoi(this_entry.c_str()) - 1; + break; + default: + panic("Comm event entry crossed max\n"); + } + } + + string dependency_info = this_event.substr(hash_pos + 2); + size_t cur_hash_pos = -2; + + // Parse dependency information of Communication Event + do { + size_t next_hash_pos = dependency_info.find('#', cur_hash_pos + 2); + string dep_info = dependency_info.substr(cur_hash_pos + 2, + (next_hash_pos - 1) - (cur_hash_pos + 2)); + cur_hash_pos = next_hash_pos; + + ThreadID thread_id = 0; + unsigned long event_id = 0; + unsigned long mem_start = 0; + unsigned long mem_end = 0; + size_t cur_space_pos = -1; + for (numEntries i = 0; i < commSharedInfoEntries; i++) { + size_t next_space_pos = dep_info.find(' ', + cur_space_pos + 1); + string this_entry = dep_info.substr(cur_space_pos + 1, + next_space_pos - (cur_space_pos + 1)); + cur_space_pos = next_space_pos; + + switch(i) { + case 0: + thread_id = atoi(this_entry.c_str()) - 1; + assert(thread_id >= 0); + break; + case 1: + event_id = strtoul(this_entry.c_str(), NULL, 0); + break; + case 2: + mem_start = strtoul(this_entry.c_str(), NULL, 0); + break; + case 3: + mem_end = strtoul(this_entry.c_str(), NULL, 0); + break; + default: + panic("Shared comm event entry crossed max\n"); + } + } + + // Convert program virtual address to modified virtual for the address + // space and +1 to offset the count of the start_range + requestSize num_bytes = (uint64_t)(mem_end - mem_start + 1); + uint64_t line_start = (uint64_t)((mem_start/blockBytes) + % numCacheLines); + uint64_t start_offset = (uint64_t)(mem_start % blockBytes); + uint64_t cur_add = (line_start << blockBits) | start_offset; + + // Split up each of the memory requests for subEvents + requestSize prev_req_size = 0; + do { + requestSize this_req_size; + + if (num_bytes >= maxRequestSize) + this_req_size = maxRequestSize; + else + this_req_size = num_bytes; + cur_add = (cur_add + prev_req_size) % memoryBytes; + + int residual = this_req_size - (int)(blockBytes - + (int)(cur_add % blockBytes)); + + if (residual > 0) { + // split it between 2 lines + MemAddrInfo *this_line = new MemAddrInfo( + thread_id, event_id, cur_add, this_req_size - residual); + new_event->commPreRequisiteEvents.push_back(this_line); + prev_req_size = (uint64_t)(this_req_size - residual); + } else { + MemAddrInfo *this_line = new MemAddrInfo( + thread_id, event_id, cur_add, this_req_size); + new_event->commPreRequisiteEvents.push_back(this_line); + prev_req_size = this_req_size; + } + num_bytes -= prev_req_size; + } while (num_bytes > 0); + } while (cur_hash_pos != string::npos); + + if (new_event->evThreadID > numThreads || new_event->evThreadID < 0) { + panic("ThreadID bad! Check number of threads in configuration\n"); + } + eventMap[new_event->evThreadID]->push_back(new_event); +} + +void +STParser::processCompEvent(string this_event) +{ + size_t dollar_pos = this_event.find('$'); + size_t star_pos = this_event.find('*'); + + STEvent *new_event = new STEvent(); + new_event->subEventList = NULL; + new_event->eventClass = STEvent::COMP; + + string event_info; + string write_event_info; + string read_event_info; + + string remainder_str = this_event; + + // Reads + if (star_pos != string::npos) { + read_event_info = remainder_str.substr(star_pos + 2); + remainder_str = remainder_str.substr(0, star_pos - 1); + } + // Writes + if (dollar_pos != string::npos) { + write_event_info = remainder_str.substr(dollar_pos + 2); + remainder_str = remainder_str.substr(0, dollar_pos - 1); + } + + event_info = remainder_str; + processCompMainEvent(event_info, new_event); + + if (dollar_pos != string::npos) + processCompWriteEvent(write_event_info, new_event); + + if (star_pos != string::npos) + processCompReadEvent(read_event_info, new_event); + + if (new_event->evThreadID > numThreads || new_event->evThreadID < 0) { + panic("ThreadID bad! Check number of threads in configuration\n"); + } + eventMap[new_event->evThreadID]->push_back(new_event); +} + +void +STParser::processCompMainEvent(string this_event, STEvent *new_event) +{ + int element_count = count(this_event.begin(), this_event.end(), ','); + + if (element_count < (compEntries - 1)) { + panic("Incorrect element count in computation event." + " Number of elements: %d\n", element_count); + } + size_t cur_pos = -1; + for (numEntries i = 0; i < compEntries; i++) { + size_t next_pos = this_event.find(',', cur_pos + 1); + string this_entry = this_event.substr(cur_pos + 1, + next_pos - (cur_pos + 1)); + cur_pos = next_pos; + + switch(i) { + case 0: + new_event->eventID = strtoul(this_entry.c_str(), NULL, 0); + break; + case 1: + new_event->evThreadID = atoi(this_entry.c_str()) - 1; + break; + case 2: + new_event->compIOPS = strtoul(this_entry.c_str(), NULL, 0); + break; + case 3: + new_event->compFLOPS = strtoul(this_entry.c_str(), NULL, 0); + break; + case 4: + new_event->compMemReads = strtoul(this_entry.c_str(), + NULL, 0); + break; + case 5: + new_event->compMemWrites = strtoul(this_entry.c_str(), + NULL, 0); + break; + default: + panic("Comp entry crossed expected max entry count\n"); + break; + } + } +} + +void +STParser::processCompWriteEvent(string dependency_info, STEvent *new_event) +{ + size_t curr_pos = -2; + do { + size_t next_pos = dependency_info.find('$', curr_pos + 2); + string dep_info = dependency_info.substr(curr_pos + 2, + (next_pos - 1) - (curr_pos + 2)); + curr_pos = next_pos; + + unsigned long mem_start = 0; + unsigned long mem_end = 0; + size_t cur_space_pos = -1; + for (numEntries i = 0; i < compWriteEntries; i++) { + size_t next_space_pos = dep_info.find(' ', + cur_space_pos + 1); + string this_entry = dep_info.substr(cur_space_pos + 1, + next_space_pos - cur_space_pos + 1); + cur_space_pos = next_space_pos; + + switch(i) { + case 0: + mem_start = strtoul(this_entry.c_str(), NULL, 0); + break; + case 1: + mem_end = strtoul(this_entry.c_str(), NULL, 0); + break; + default: + panic("Comp write entry crossed expected max\n"); + break; + } + } + + vector this_comp_write_event; + + // Convert program virtual address to modified virtual for the address + // space and +1 to offset the count of the start_range + requestSize num_bytes = (uint64_t)(mem_end - mem_start + 1); + + uint64_t line_start = (uint64_t)((mem_start/blockBytes) + % numCacheLines); + uint64_t start_offset = (uint64_t)(mem_start + % blockBytes); + uint64_t cur_add = (line_start << blockBits) | start_offset; + + // Split up each of the memory requests for subEvents + requestSize prev_req_size = 0; + do { + requestSize this_req_size; + + this_req_size = (num_bytes >= maxRequestSize ? maxRequestSize : + num_bytes); + cur_add = (cur_add + prev_req_size) % memoryBytes; + + int residual = this_req_size - (int)(blockBytes - + (int)(cur_add % blockBytes)); + + if (residual > 0) { + // Split it between 2 lines + MemAddrInfo *this_line = new MemAddrInfo( + 0, 0, cur_add, this_req_size - residual); + new_event->compWriteEvents.push_back(this_line); + prev_req_size = (uint64_t)(this_req_size - residual); + } else { + MemAddrInfo *this_line = new MemAddrInfo( + 0, 0, cur_add, this_req_size); + new_event->compWriteEvents.push_back(this_line); + prev_req_size = this_req_size; + } + num_bytes -= prev_req_size; + } while (num_bytes > 0); + } while (curr_pos != string::npos); +} + +void +STParser::processCompReadEvent(string dependency_info, STEvent *new_event) +{ + size_t cur_star_pos = -2; + do { + size_t nextStar_Pos = dependency_info.find('*', cur_star_pos + 2); + string dep_info = dependency_info.substr(cur_star_pos + 2, + (nextStar_Pos - 1) - (cur_star_pos + 2)); + cur_star_pos = nextStar_Pos; + + unsigned long mem_start = 0; + unsigned long mem_end = 0; + size_t cur_space_pos = -1; + for (numEntries i = 0; i < compReadEntries; i++) { + size_t next_space_pos = dep_info.find(' ', cur_space_pos + 1); + string this_entry = dep_info.substr(cur_space_pos + 1, + next_space_pos - + cur_space_pos + 1); + cur_space_pos = next_space_pos; + + switch(i) { + case 0: + mem_start = strtoul(this_entry.c_str(), NULL, 0); + break; + case 1: + mem_end = strtoul(this_entry.c_str(), NULL, 0); + break; + default: + panic("Comp read entry crossed expected max\n"); + break; + } + } + + vector this_comp_readEvent; + // Convert program virtual address to modified virtual for the + // address space + requestSize num_bytes = (uint64_t)(mem_end - + mem_start + 1); + // +1 to offset the count of the start_range + uint64_t line_start = (uint64_t)((mem_start/blockBytes) + % numCacheLines); + uint64_t start_offset = (uint64_t)(mem_start + % blockBytes); + uint64_t cur_add = (line_start << blockBits) | start_offset; + + // Split up each of the memory requests for subEvents + requestSize prev_req_size = 0; + do { + requestSize this_req_size; + + if (num_bytes >= maxRequestSize) + this_req_size = maxRequestSize; + else + this_req_size = num_bytes; + cur_add = (cur_add + prev_req_size) % memoryBytes; + + int residual = this_req_size - (int)(blockBytes - + (int)(cur_add % blockBytes)); + + if (residual > 0) { + // Split it between 2 lines + MemAddrInfo *this_line = new MemAddrInfo( + 0, 0, cur_add, this_req_size - residual); + new_event->compReadEvents.push_back(this_line); + prev_req_size = (uint64_t)(this_req_size - residual); + } else { + MemAddrInfo *this_line = new MemAddrInfo( + 0, 0, cur_add, this_req_size); + new_event->compReadEvents.push_back(this_line); + prev_req_size = this_req_size; + } + num_bytes -= prev_req_size; + } while (num_bytes > 0); + } while (cur_star_pos != string::npos); +} + +void +STParser::processPthreadEvent(string this_event, size_t caret_pos) +{ + string event_info = this_event.substr(0, caret_pos - 1); + STEvent *new_event = new STEvent(); + new_event->subEventList = NULL; + new_event->eventClass = STEvent::THREAD_API; + + string tmp_str; + size_t cur_pos = -1, next_pos = -1; + + for (numEntries i = 0; i < pthreadEntries; i++) { + next_pos = event_info.find(',', cur_pos + 1); + tmp_str = event_info.substr(cur_pos + 1, next_pos - (cur_pos + 1)); + cur_pos = next_pos; + + switch (i) { + case 0: + new_event->eventID = strtoul(tmp_str.c_str(), NULL, 0); + break; + case 1: + new_event->evThreadID = strtoul(tmp_str.c_str(), NULL, 0) - 1; + break; + case 2: + assert(next_pos == string::npos); + next_pos = tmp_str.find(':'); + assert(tmp_str.substr(0, next_pos) == pthreadTag); + new_event->eventType = (STEvent::EventType) + strtol(tmp_str.substr(next_pos + 1, string::npos).c_str(), + NULL, 0); + break; + default: + panic("phtread event entry crossed expected max\n"); + } + } + + new_event->pthAddr = + strtoul(this_event.substr(caret_pos + 2, string::npos).c_str(), + NULL, 0); + + if (new_event->evThreadID > numThreads || new_event->evThreadID < 0) + panic("ThreadID bad! Check number of threads in configuration\n"); + eventMap[new_event->evThreadID]->push_back(new_event); +} diff --git a/src/cpu/testers/synchrotrace/synchro_trace.hh b/src/cpu/testers/synchrotrace/synchro_trace.hh new file mode 100644 --- /dev/null +++ b/src/cpu/testers/synchrotrace/synchro_trace.hh @@ -0,0 +1,484 @@ +/* + * Copyright (c) 2015-2016 ARM Limited + * All rights reserved + * + * The license below extends only to copyright in the software and shall + * not be construed as granting a license to any other intellectual + * property including but not limited to intellectual property relating + * to a hardware implementation of the functionality of the software + * licensed hereunder. You may use the software subject to the license + * terms below provided that you ensure that this notice is replicated + * unmodified and in its entirety in all distributions of the software, + * modified or unmodified, in source code or in binary form. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * Copyright (c) 2015, Drexel University + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * + * 1. Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. Neither the name of the copyright holder nor the names of its + * contributors may be used to endorse or promote products derived from this + * software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS + * IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, + * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR + * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Authors: Karthik Sangaiah + * Ankit More + * Radhika Jagtap + */ + +#ifndef __CPU_TESTERS_SYNCHROTRACE_SYNCHROTRACE_HH +#define __CPU_TESTERS_SYNCHROTRACE_SYNCHROTRACE_HH + +#include +#include +#include +#include +#include +#include +#include + +#include "base/intmath.hh" +#include "base/misc.hh" +#include "cpu/testers/synchrotrace/st_event.hh" +#include "cpu/testers/synchrotrace/st_parser.hh" +#include "debug/ROI.hh" +#include "debug/STDebug.hh" +#include "debug/STEventPrint.hh" +#include "debug/STIntervalPrint.hh" +#include "debug/STMutexLogger.hh" +#include "mem/mem_object.hh" +#include "mem/packet.hh" +#include "mem/ruby/common/Address.hh" +#include "mem/ruby/common/DataBlock.hh" +#include "mem/ruby/common/SubBlock.hh" +#include "mem/ruby/system/RubyPort.hh" +#include "mem/ruby/system/RubySystem.hh" +#include "params/SynchroTrace.hh" +#include "sim/system.hh" + +/* SynchroTrace's trace replay is a 1-CPI timing model that interfaces with + * the multi-threaded traces from Sigil to inject traffic into the detailed + * memory model. + * + * The capture tool, Sigil, leverages the Valgrind dynamic binary + * instrumentation tool. The processed instructions from the native + * multi-threaded applications are abstracted into (3) events: + * Computation (Work performed local to a thread), Communication (Read/Write + * dependencies between threads), and Synchronization (embedded pthread calls + * for each thread). These events form a trace for each individual thread, + * so that these threads may progress in parallel when replaying the traces. + * + * SynchroTrace's trace replay is comprised of one tester module of type + * MemObject and contains the contexts of each individual cores. Each core's + * events are scheduled into the global event queue as + * 'coreEvents[processor]'. The replay module connects each core context to + * each of the ports of the private L1s of the memory model. In addition to + * the core contexts, a back-end simulation thread runs periodically to + * replenish the events data structure for each thread and check if the + * simulation has been completed. + * + * Each line of the traces are parsed (by STParser) into events, which are + * contained in 'eventMap'. 'eventMap' is a list of pointers to each thread's + * deque of events. As the events of the traces are potentially comprised of + * multiple memory requests, events are broken up into sub events for each + * request. These sub events hold a divided portion of the compute ops, as + * well as one request. + * + * The normal execution flow of the replay module is as follows: + * 1) Processor wakes up + * 2) Sub events are generated for the top event of the thread + * 3) Based on the compute ops for that sub event, a trigger time is + * scheduled to wakeup the core sometime in the future. The timing for the + * compute ops are configurable via the command line options 'cpi_iops' and + * 'cpi_flops'. + * 4) When the sub event's trigger time has been reached, a read or write + * request is sent out to the memory system and the core essentially blocks. + * 5) Eventually the memory request returns as a "hitCallback" and the next + * sub event or event is scheduled. + * + * In the case of synchronization events: + * 1) Once the master thread obtains a "create", it issues a wakeup to the + * slave thread. The master thread will also wait on a "join". + * 2) Threads can attempt to obtain or release mutex locks and are held in + * barrier fences to preserve the native synchronization behavior. + * + * In the case of communication events, the replay module will enforce + * producer->consumer dependencies by forcing the consumer thread to wait on + * the producer thread reaching the dependent event. After the producer + * thread has reached the dependent event, the consumer thread will then send + * its memory read request to the memory system. + * + * The SynchroTrace replay module handles the scheduling of multiple + * threads per core. After each event is completed, the replay module checks + * if there is an available thread on that core to schedule and swaps + * accordingly. + * + */ + +class SynchroTrace : public MemObject +{ + public: + + class CpuPort : public MasterPort + { + private: + + SynchroTrace *tester; + + public: + + CpuPort(const std::string &_name, SynchroTrace *_tester, + PortID _id) + : MasterPort(_name, _tester, _id), tester(_tester) + {} + + protected: + + /** + * Receive memory request response from the memory system to the + * SynchroTrace CPU port + */ + virtual bool recvTimingResp(PacketPtr pkt); + + // TODO - Implement retry mechanism for classic memory model + virtual void recvRetry() { + panic("%s does not expect a retry\n", name()); + } + void recvTimingSnoopReq(PacketPtr pkt) + {} + virtual void recvReqRetry() { + panic("%s does not expect a retry\n", name()); + } + }; + + typedef SynchroTraceParams Params; + SynchroTrace(const Params *p); + ~SynchroTrace(); + + /** Used to get a reference to the master port. */ + virtual BaseMasterPort &getMasterPort(const std::string &if_name, + PortID idx = InvalidPortID); + + /** Used to get a reference to the CPU port. */ + MasterPort *getCpuPort(int idx); + + virtual void init(); + + /** Wake up the back-end simulation thread. */ + void wakeup(); + + /** Wake up the core. */ + void wakeup(int proc_id); + + protected: + class SynchroTraceStartEvent : public Event + { + private: + + SynchroTrace *tester; + + public: + + SynchroTraceStartEvent(SynchroTrace *_tester) + : Event(CPU_Tick_Pri), tester(_tester) + {} + /** Waking up back-end simulation thread. */ + void process() { tester->wakeup(); } + virtual const char *description() const { + return "SynchroTrace tick"; + } + }; + + class SynchroTraceCoreEvent : public Event + { + private: + + SynchroTrace *tester; + int procID; + + public: + + SynchroTraceCoreEvent(SynchroTrace *_tester, int _procID) + : Event(CPU_Tick_Pri), tester(_tester), procID(_procID) + {} + /** Waking up cores. */ + void process() { tester->wakeup(procID); } + virtual const char *description() const { + return "Core event tick"; + } + }; + + /** Waking up back-end simulation thread. */ + SynchroTraceStartEvent synchroTraceStartEvent; + + /** Waking up cores. */ + std::vector coreEvents; + + MasterID masterID; + + private: + + enum LocalMemAccessType { + READ, + WRITE, + }; + + + // Python params passed to SynchroTrace object + + /** Vector of ports for CPU to memory system */ + std::vector ports; + + /** Number of CPUs set from the cmd line */ + int numCpus; + + /** Number of threads set from the cmd line */ + int numThreads; + + /** Directory of Sigil Traces and Pthread metadata file */ + std::string eventDir; + + /** Directory of output files */ + std::string outDir; + + /** Back-end simulator thread frequency */ + int wakeupFreq; + + /** Option to use Ruby */ + bool useRuby; + + /** Block Size */ + int m_block_size_bytes; + + /** Parser Object */ + STParser *parser; + + const uint16_t printCount; + + /** + * Counter used for 'STIntervalPrint' to print event progression after + * N core wakeups + */ + int printThreadEventCounters; + + /** + * Flag used for 'ROI' to print out entering and leaving the parallel + * region + */ + bool roiFlag; + + /** + * Counter used for 'ROI' to print out entering and leaving the parallel + * region + */ + int workerThreadCount; + + /** Abstract cpi estimation for integer ops */ + float CPI_IOPS; + + /** Abstract cpi estimation for floating point ops */ + float CPI_FLOPS; + + /** Flag to skip Producer -> Consumer dependencies */ + bool pcSkip; + + /** Block size in bytes */ + int blockSizeBytes; + + /** Block size in bits*/ + int blockSizeBits; + + /** Memory Size in bytes */ + uint64_t memorySizeBytes; + + /** + * Central event map: list of pointers to event deques of each thread. + * Each deque reads in 1000 events each time the deque size falls to + * 100 events. + */ + std::deque **eventMap; + + /** Vector of cores' threads */ + std::vector> threadMap; + + /** Vector of threads' statuses, i.e. whether they are active */ + std::vector threadStartedMap; + + /** Holds which threads currently possess a mutex lock */ + std::vector threadMutexMap; + + /** Holds if thread can proceed past a barrier */ + std::vector threadContMap; + + /** Holds mutex locks in use */ + std::set mutexLocks; + + /** Holds spin locks in use */ + std::set spinLocks; + + /** Map converting each slave thread's pthread address to thread ID */ + std::map addresstoIDMap; + + /** Holds barriers used in application */ + std::map> barrierMap; + + /** Holds which threads are waiting for a barrier */ + std::map> threadWaitMap; + + /** Sigil trace file pointer */ + std::vector inputFilePointer; + + /** Output file pointers */ + std::vector outputFilePointer; + + /** + * Number of threads (if less than number of cores). Otherwise, + * number of cores. + */ + int numContexts; + + /** Private copy constructor */ + SynchroTrace(const SynchroTrace &obj); + + /** Private assignment operator */ + SynchroTrace& operator=(const SynchroTrace &obj); + + /** Initialize debug flags and mutex lock/barrier maps */ + void initStats(); + + /** + * Map threads to cores. Currently implemented as a round robin mapping + * of threads to cores. + */ + void initialThreadMapping(); + + /** + * Check of master thread has completed its events and issue an exit + * call back. + */ + void checkCompletion(); + + /** + * Debug functions to view progress, prints event id for every thread + * at 50k wakeups. + */ + void printThreadEvents(); + + /** + * Print Event id for specific thread before/after event is + * loaded/completed. + * + * @param thread_id thread ID of event + * @param is_end set to true if this event has completed + */ + void printEvent(ThreadID thread_id, bool is_end); + + /** + * Break up events into sub events comprised of a division of the + * compute ops and one memory request. The sub events are placed in + * a sub event list for each event. Creating sub events only occurs + * prior to an events actual processing as to reduce the total amount + * of memory usage in simulation (compared to creating a sub event for + * every event in the eventMap). + */ + void createSubEvents(int proc_id, bool event_id_passed = false, + ThreadID event_thread_id = 0); + + /** Handle synchronization progress for each of the threads. */ + void progressPthreadEvent(STEvent *this_event, int proc_id); + + /** + * Handles progression of each thread depending on event type and + * status. + */ + void progressEvents(int proc_id); + + /** + * Swapping of threads within a core is allowed if: + * The head event in the currently allocated thread has finished an event + * and one of the other threads is ready. + * + * The swapThreads() function is called every time an event finishes. + */ + void swapThreads(int proc_id); + + /** + * Function is called on core wakeups to prevent cores from stalling + * when scheduling multiple threads per core. + */ + void swapStalledThreads(int proc_id); + + /** + * Handles moving thread from top of thread queue on a core to the + * back. + */ + void moveThreadToHead(int proc_id, ThreadID thread_id); + + /** Handles selecting a read or a write when generating sub events. */ + LocalMemAccessType memTypeToInsert(unsigned long loc_reads, + unsigned long loc_writes, + unsigned long max_loc_reads, + unsigned long max_loc_writes, + LocalMemAccessType type); + + /** Send a blocking message request to memory system. */ + void triggerMsg(int proc_id, ThreadID thread_id, + SubEvent *this_sub_event); + + /** Memory request returned! Queue up next event. */ + void hitCallback(NodeID proc, PacketPtr pkt); + + /** + * For a communication event, check to see if the producer has reached + * the dependent event. This function also handles the case of a system + * call. System calls are viewed as producer->consumer interactions with + * the 'producer' system call having a ThreadID of 30000. For obvious + * reasons, there are no 'dependencies' to enforce in the case of a system + * call. + */ + bool checkCommDependency(MemAddrInfo *comm_event, ThreadID thread_id); + + /** + * There can be mutiple Producer->Consumer dependencies within an event. + * This function calls checkCommDependency(...) for all + * producer->consumer dependencies. + */ + bool checkAllCommDependencies(STEvent *this_event); + + + /** Check if all necessary threads are in barrier. */ + bool checkBarriers(STEvent *this_event); + +}; +#endif // __CPU_TESTERS_SYNCHROTRACE_SYNCHROTRACE_HH diff --git a/src/cpu/testers/synchrotrace/synchro_trace.cc b/src/cpu/testers/synchrotrace/synchro_trace.cc new file mode 100644 --- /dev/null +++ b/src/cpu/testers/synchrotrace/synchro_trace.cc @@ -0,0 +1,962 @@ +/* + * Copyright (c) 2015-2016 ARM Limited + * All rights reserved + * + * The license below extends only to copyright in the software and shall + * not be construed as granting a license to any other intellectual + * property including but not limited to intellectual property relating + * to a hardware implementation of the functionality of the software + * licensed hereunder. You may use the software subject to the license + * terms below provided that you ensure that this notice is replicated + * unmodified and in its entirety in all distributions of the software, + * modified or unmodified, in source code or in binary form. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * Copyright (c) 2015, Drexel University + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * + * 1. Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. Neither the name of the copyright holder nor the names of its + * contributors may be used to endorse or promote products derived from this + * software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS + * IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, + * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR + * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Authors: Karthik Sangaiah + * Ankit More + * Radhika Jagtap + */ + +#include "sim/sim_exit.hh" +#include "synchro_trace.hh" + +using namespace std; + +SynchroTrace::SynchroTrace(const Params *p) + : MemObject(p), synchroTraceStartEvent(this), + masterID(p->system->getMasterId(name())), + numCpus(p->num_cpus), numThreads(p->num_threads), + eventDir(p->event_dir), outDir(p->output_dir), + wakeupFreq(p->master_wakeup_freq), + useRuby(p->ruby), + m_block_size_bytes(p->block_size_bytes), + printCount(50000), + CPI_IOPS(p->cpi_iops), + CPI_FLOPS(p->cpi_flops), + pcSkip(p->pc_skip), + memorySizeBytes(p->mem_size_bytes) +{ + // Initialize the SynchroTrace to Memory ports + for (int i = 0; i < p->num_cpus; ++i) { + ports.push_back(new CpuPort(csprintf("%s-port%d", name(), i), + this, i)); + } + assert(ports.size() > 0); +} + +SynchroTrace::~SynchroTrace() +{ + for (int i = 0; i < ports.size(); i++) + delete ports[i]; + + for (int i = 0; i < numContexts; i++) + delete coreEvents[i]; + + for (int i = 0; i < numThreads; i++) + delete eventMap[i]; + delete[] eventMap; +} + +void +SynchroTrace::init() +{ + assert(isPowerOf2(numCpus)); + + // Initialize memory params + if (useRuby) { + blockSizeBytes = RubySystem::getBlockSizeBytes(); + blockSizeBits = RubySystem::getBlockSizeBits(); + } else { + blockSizeBytes = m_block_size_bytes; + blockSizeBits = floorLog2(blockSizeBytes); + } + + if (STParser::maxRequestSize > blockSizeBytes) + panic("Error in SynchroTrace!!: maxRequestSize " + " is greater than block size (%d)!!", + blockSizeBytes); + + // Initialize thread/cpu params + if (numThreads < numCpus) { + numContexts = numThreads; + } else { + numContexts = numCpus; + } + + // Initialize centralized event map + eventMap = new deque*[numThreads]; + for (int i = 0; i < numThreads; i++) + eventMap[i] = new deque; + + // Initiate thread maps and set master thread as active + threadMap.resize(numContexts); + threadStartedMap.resize(numThreads); + for (int i = 0; i < numThreads; i++) + threadStartedMap[i] = false; + threadStartedMap[0] = true; + + // Initiate event list per core + for (int i = 0; i < numContexts; i++) + coreEvents.push_back(new SynchroTraceCoreEvent(this, i)); + + // Initial scheduling of master simulator thread and cores + schedule(synchroTraceStartEvent, 1); + for (int i = 0; i < numContexts; i++) + schedule(*(coreEvents[i]), clockPeriod()); + + // Create parser for pthread metadata and Sigil trace events + parser = new STParser(numThreads, eventMap, eventDir, outDir, + memorySizeBytes, blockSizeBytes, blockSizeBits); + + // Parse Pthread Metadata + addresstoIDMap = parser->getAddressToIDMap(); + barrierMap = parser->getBarrierMap(); + + // Get Sigil trace and output file pointers + inputFilePointer = parser->getInputFilePointer(); + outputFilePointer = parser->getOutputFilePointer(); + + // Initialzie debug flags and mutex lock/barrier maps + initStats(); + + // Map threads to cores + initialThreadMapping(); + + // Parse the first set of events + parser->generateEventQueue(); +} + +void +SynchroTrace::initStats() +{ + // ThreadContMap to check if thread is finished with a barrier. + threadContMap.resize(numThreads); + + // ThreadMutexMap to check if thread is holding a mutex lock. + threadMutexMap.resize(numThreads); + + for (int i = 0; i < numThreads; i++) { + threadContMap[i] = false; + threadMutexMap[i] = false; + } + + printThreadEventCounters = 0; + roiFlag = false; + workerThreadCount = 0; +} + +MasterPort* +SynchroTrace::getCpuPort(int idx) +{ + assert(idx >= 0 && idx < ports.size()); + return ports[idx]; +} + +BaseMasterPort & +SynchroTrace::getMasterPort(const std::string &if_name, PortID idx) +{ + if (if_name != "cpu_port") { + // pass it along to our super class + return MemObject::getMasterPort(if_name, idx); + } else { + if (idx >= static_cast(ports.size())) + panic("SynchroTrace::getMasterPort: unknown index %d\n", idx); + return *ports[idx]; + } +} + +bool +SynchroTrace::CpuPort::recvTimingResp(PacketPtr pkt) +{ + tester->hitCallback(id, pkt); + + // Only need timing of the memory request. + // No need for the actual data. + delete pkt->req; + delete pkt; + return true; +} + +void +SynchroTrace::wakeup() +{ + // Replenish each thread's events if depleted + for (ThreadID thread_id = 0; thread_id < numThreads; thread_id++) + parser->replenishEvents(thread_id); + + // Terminates the simulation after checking if all the events are done + checkCompletion(); + + // Schedule to keep this back-end simulation thread running + schedule(synchroTraceStartEvent, curTick() + clockPeriod() * wakeupFreq); +} + +void +SynchroTrace::wakeup(int proc_id) +{ + // For every 50k wakeup counts, print all the thread's EventID# + printThreadEvents(); + + // Create subevents for the thread at the head of the core list + // it is not necessary that this will happen always. + // If the subevents have been created, simply skip + createSubEvents(proc_id); + + // Main progression of Event Queue + progressEvents(proc_id); + + // Swap threads in cores if allowed + swapStalledThreads(proc_id); +} + +void +SynchroTrace::printThreadEvents() +{ + if (printThreadEventCounters == printCount) { + for (int i = 0; i < numThreads; i++) { + if (!eventMap[i]->empty()) { + DPRINTF(STIntervalPrint, "Thread %d is on Event %d\n", + i, eventMap[i]->front()->eventID); + } + } + for (int i = 0; i < numCpus; i++) { + if (i < numThreads) { + DPRINTF(STIntervalPrint, "Thread %d is on top Core %d\n", + threadMap[i][0], i); + } else { + DPRINTF(STIntervalPrint, "No Thread is on top Core %d\n", i); + } + } + printThreadEventCounters = 0; + } else { + printThreadEventCounters++; + } +} + +void +SynchroTrace::printEvent(ThreadID thread_id, bool is_end) +{ + if (!eventMap[thread_id]->empty()) { + if (!is_end) { + DPRINTF(STEventPrint, "Starting %d, %d\n", + thread_id, eventMap[thread_id]->front()->eventID); + } else { + DPRINTF(STEventPrint, "Finished %d, %d\n", + thread_id, eventMap[thread_id]->front()->eventID); + } + } +} + +void +SynchroTrace::checkCompletion() +{ + // Termination condition: terminate when thread 0 is completed. + if (eventMap[0]->empty()) { + for (int i = 0; i < numThreads; i++) { + if (inputFilePointer[i] != NULL) { + inputFilePointer[i]->close(); + delete inputFilePointer[i]; + } + if (outputFilePointer[i] != NULL) { + outputFilePointer[i]->close(); + delete outputFilePointer[i]; + } + } + exitSimLoop("SynchroTrace completed"); + } +} + +void +SynchroTrace::swapThreads(int proc_id) +{ + int num_threads = threadMap[proc_id].size(); + + // Check list of threads on the core to see if the thread is available + for (int i = 1; i < num_threads; i++) { + // Next thread on core + ThreadID top_thread_id = threadMap[proc_id][i]; + + // Thread is unavailable or completed its events, try next thread + if (!threadStartedMap[top_thread_id] || + eventMap[top_thread_id]->empty()) + continue; + + // Pull up next threads events + STEvent *topEvent = eventMap[top_thread_id]->front(); + + // Threads with Computation events and Pthread events + // can be swapped in always. + // + // Threads with Communication events are checked to see + // if all producer -> consumer dependencies have been met. + // + // Corner case: Consumer thread has a mutex while waiting + // for a dependency. We don't enforce this communication + // edge. + if (topEvent->eventClass == STEvent::COMP) { + moveThreadToHead(proc_id, i); + break; + } else if (topEvent->eventClass == STEvent::THREAD_API) { + moveThreadToHead(proc_id, i); + break; + // Communication Event + } else if (checkAllCommDependencies(topEvent) || + threadMutexMap[topEvent->evThreadID] || + pcSkip) { + moveThreadToHead(proc_id, i); + break; + } + } +} + +void +SynchroTrace::swapStalledThreads(int proc_id) +{ + ThreadID event_thread_id = threadMap[proc_id].front(); + // If current thread is completed or is unavailable, + // check to swap in next threads + if (eventMap[event_thread_id]->empty() || + !threadStartedMap[event_thread_id]) { + swapThreads(proc_id); + + // Scheduling next thread's core to wake up next cycle. + if (!(coreEvents[proc_id]->scheduled())) { + schedule(*(coreEvents[proc_id]), curTick() + clockPeriod()); + } + return; + } + + // Swap if current thread is waiting on obtaining a mutex lock + STEvent *this_event = eventMap[event_thread_id]->front(); + if ((this_event->eventType == STEvent::MUTEX_LOCK) && + !threadMutexMap[this_event->evThreadID]) { + swapThreads(proc_id); + if (!(coreEvents[proc_id]->scheduled())) { + schedule(*(coreEvents[proc_id]), curTick()); + } + } +} + +void +SynchroTrace::moveThreadToHead(int proc_id, ThreadID thread_id) +{ + for (ThreadID i = 0; i < thread_id; i++) { + // Move the thread at the head to the end + threadMap[proc_id].push_back(threadMap[proc_id][0]); + threadMap[proc_id].erase(threadMap[proc_id].begin()); + } +} + +bool +SynchroTrace::checkAllCommDependencies(STEvent *this_event) +{ + assert(this_event->eventClass == STEvent::COMM); + bool check = true; + // Check all of the producer->consumer dependencies within the event + for (unsigned long i = 0; + i < this_event->commPreRequisiteEvents.size(); i++) { + check &= checkCommDependency(this_event->commPreRequisiteEvents[i], + this_event->evThreadID); + } + return check; +} + +bool +SynchroTrace::checkCommDependency(MemAddrInfo *comm_event, ThreadID thread_id) +{ + // This check is for OS-related traffic. + // We indicate a communication event with the OS producer thread + // as having a ThreadID of 30000 + if (comm_event->reqThreadID == 30000) { + return true; + } + + // If the producer thread's eventID is greater than the dependent event + // then the dependency is satisfied + if (!eventMap[comm_event->reqThreadID]->empty()) { + if (eventMap[comm_event->reqThreadID]->front()->eventID > + comm_event->reqEventID) + return true; + else + return false; + } else { + return true; + } +} + +void +SynchroTrace::initialThreadMapping() +{ + // Currently map threads in a simple round robin fashion on cores + for (int i = 0; i < numThreads; i++) { + threadMap[i % numContexts].push_back(i); + } +} + +SynchroTrace* +SynchroTraceParams::create() +{ + return new SynchroTrace(this); +} + +void +SynchroTrace::hitCallback(NodeID proc_id, PacketPtr pkt) +{ + assert(proc_id < numContexts); + ThreadID event_thread_id = threadMap[proc_id].front(); + STEvent *this_event = eventMap[event_thread_id]->front(); + + if (!(this_event->subEventList->front().msgTriggered)) { + warn("%x: Message not triggered but received hitCallback:" + " nodeID: %d Event: %d\n", curTick(), proc_id, + this_event->eventID); + return; + } + + assert(this_event->subEventList->front().msgTriggered); + + // Sub event completed when memory request returns + this_event->subEventList->pop_front(); + + // If all sub events are completed, delete event. + if (this_event->subEventList->empty()) { + printEvent(event_thread_id, true); // Print event completion + + // Delete Event + DPRINTF(STDebug, "Event %d completed for thread %d on core %d \n", + eventMap[event_thread_id]->front()->eventID, + event_thread_id, proc_id); + + STEvent *completed_event = eventMap[event_thread_id]->front(); + eventMap[event_thread_id]->pop_front(); + delete completed_event; + + // If multiple threads per core, check if we can swap threads + if (threadMap[proc_id].size() > 1) + swapThreads(proc_id); + + if (!eventMap[event_thread_id]->empty()) { + // Print: Event is starting + printEvent(event_thread_id, false); + + this_event = eventMap[event_thread_id]->front(); + parser->replenishEvents(this_event->evThreadID); + } + + // Schedule core to handle new event + schedule(*(coreEvents[proc_id]), curTick()); + } else { + // Event not completed - Pull up the next subevent and schedule + SubEvent *new_sub_event = &(this_event->subEventList->front()); + Tick comp_time = clockPeriod() * + (Cycles(CPI_IOPS * new_sub_event->numIOPS) + + Cycles(CPI_FLOPS * new_sub_event->numFLOPS)); + new_sub_event->triggerTime = comp_time + curTick() + clockPeriod(); + schedule(*(coreEvents[proc_id]), new_sub_event->triggerTime); + } +} + +void +SynchroTrace::triggerMsg(int proc_id, ThreadID thread_id, + SubEvent *this_sub_event) +{ + // Package memory request + assert (!eventMap[thread_id]->empty()); + + Addr addr; + addr = this_sub_event->thisMsg->addr; + + Request::Flags flags; + + Request *req = new Request(addr, this_sub_event->thisMsg->numBytes, + flags, masterID); + req->setContext(proc_id); + + Packet::Command cmd; + + if (this_sub_event->msgType == SubEvent::REQ_READ) + cmd = MemCmd::ReadReq; + else + cmd = MemCmd::WriteReq; + + PacketPtr pkt = new Packet(req, cmd); + uint8_t *dummy_data = new uint8_t; + *dummy_data = 0; + pkt->dataDynamic(dummy_data); + + DPRINTF(STDebug, "Trying to access Addr 0x%x\n", pkt->getAddr()); + + // Send memory request + if (ports[proc_id]->sendTimingReq(pkt)) { + this_sub_event->msgTriggered = true; + DPRINTF(STDebug, "%d: Message Triggered:" + " Core %d; Thread %d; Event %d; Subevents size %d;" + " Addr 0x%x\n", + curTick(), proc_id, thread_id, + eventMap[thread_id]->front()->eventID, + eventMap[thread_id]->front()->subEventList->size(), addr); + } else { + warn("%d: Packet did not issue from ProcID: %d, ThreadID: %d", + curTick(), proc_id, thread_id); + // If the packet did not issue, must delete! + // Note: No need to delete the data, the packet destructor + // will delete it + delete pkt->req; + delete pkt; + } +} + +void +SynchroTrace::progressEvents(int proc_id) +{ + ThreadID event_thread_id = threadMap[proc_id].front(); + + // Skip if thread hasn't been activated + if (!threadStartedMap[event_thread_id]) + return; + + // Skip when all events completed + if (eventMap[event_thread_id]->empty()) + return; + + STEvent *this_event = eventMap[event_thread_id]->front(); + SubEvent *top_sub_event = &(this_event->subEventList->front()); + + if (top_sub_event->triggerTime > curTick() || + top_sub_event->msgTriggered) { + // Schedule sub event + if (!(coreEvents[proc_id]->scheduled())) + schedule(*(coreEvents[proc_id]), top_sub_event->triggerTime); + return; + } + + + // The first subevent contains no messages indicating it is completed and + // we can pop it from the list. + if (!top_sub_event->containsMsg) { + + if (this_event->eventClass != STEvent::THREAD_API) { + this_event->subEventList->pop_front(); + } else + progressPthreadEvent(this_event, proc_id); + + // Events with no remaining subevents are completed + // and we can pop it from the events list + if (this_event->subEventList->empty()) { + // Print: Event was just completed + printEvent(event_thread_id, true); + + // Delete event + DPRINTF(STDebug, "Event %d completed for thread %d on core %d \n", + eventMap[event_thread_id]->front()->eventID, + event_thread_id, proc_id); + + STEvent *completed_event = eventMap[event_thread_id]->front(); + eventMap[event_thread_id]->pop_front(); + delete completed_event; + + // If multiple threads per core, check if we can swap threads + if (threadMap[proc_id].size() > 1) + swapThreads(proc_id); + + if (!eventMap[event_thread_id]->empty()) { + // Print: Event is starting + printEvent(event_thread_id, false); + + this_event = eventMap[event_thread_id]->front(); + parser->replenishEvents(this_event->evThreadID); + } + + // Schedule core to handle new event + if (!(coreEvents[proc_id]->scheduled())) { + schedule(*(coreEvents[proc_id]), curTick()); + } + + DPRINTF(STDebug, "Core %d scheduled for %d\n", proc_id, curTick()); + + } else { + // Pull up the next subevent and schedule + SubEvent *new_sub_event = &(this_event->subEventList->front()); + Tick comp_time = clockPeriod() * + (Cycles(CPI_IOPS * new_sub_event->numIOPS) + + Cycles(CPI_FLOPS * new_sub_event->numFLOPS)); + new_sub_event->triggerTime = comp_time + curTick() + clockPeriod(); + schedule(*(coreEvents[proc_id]), new_sub_event->triggerTime); + DPRINTF(STDebug, "Core %d scheduled for %d\n", + proc_id, new_sub_event->triggerTime); + } + } else { + // The first first sub-event contains an unprocessed message so we + // attempt to create and trigger it here. + if (this_event->eventClass == STEvent::COMM) { + // Check if communication dependencies have been met. If yes, + // trigger the msg. If not, reschedule wakeup for next cycle. When + // consumer threads have a mutex lock, do not maintain the + // dependency as this could cause a deadlock. Could be user-level + // synchronization, and we do not want to maintain false + // dependencies. + if ((checkCommDependency(top_sub_event->thisMsg, event_thread_id) + || threadMutexMap[event_thread_id]) || pcSkip) { + triggerMsg(proc_id, event_thread_id, top_sub_event); + } else { + // Check if dependency met next cycle + top_sub_event->triggerTime = clockPeriod() * (curCycle() + + Cycles(1)); + schedule(*(coreEvents[proc_id]), top_sub_event->triggerTime); + } + } else { + // Send LD/ST for computation events + triggerMsg(proc_id, event_thread_id, top_sub_event); + } + } +} + +void +SynchroTrace::progressPthreadEvent(STEvent *this_event, int proc_id) +{ + assert(this_event); + bool consumed_event = false; + set::iterator mutex_ittr, spin_ittr; + + ThreadID slave_thread_id; + int slave_proc_id; + switch (this_event->eventType) { + case STEvent::MUTEX_LOCK: + if (mutexLocks.find(this_event->pthAddr) == mutexLocks.end()) { + mutexLocks.insert(this_event->pthAddr); + threadMutexMap[this_event->evThreadID] = true; + // Thread is now holding mutex lock. + + DPRINTF(STMutexLogger,"Thread %d locked mutex %d\n", + this_event->evThreadID, this_event->pthAddr); + consumed_event = true; + } + break; + + case STEvent::MUTEX_UNLOCK: + mutex_ittr = mutexLocks.find(this_event->pthAddr); + assert(mutex_ittr != mutexLocks.end()); + mutexLocks.erase(mutex_ittr); + threadMutexMap[this_event->evThreadID] = false; + // Thread returned mutex lock. + + DPRINTF(STMutexLogger,"Thread %d unlocked mutex %d\n", + this_event->evThreadID, this_event->pthAddr); + consumed_event = true; + break; + + case STEvent::THREAD_CREATE: + if (!roiFlag) { + DPRINTF(ROI,"Reached parallel region.\n"); + roiFlag = true; + } + + workerThreadCount++; + slave_thread_id = addresstoIDMap[this_event->pthAddr]; + threadStartedMap[slave_thread_id] = true; + // Activated Slave Thread + consumed_event = true; + + // Wake up slave threads' cores + // TODO - '%numCpus' is used to obtain the slave thread's proc_id + // from the thread_id. This is only relevant for the default + // round-robin scheduling. + slave_proc_id = slave_thread_id % numCpus; + if (!(coreEvents[slave_proc_id]->scheduled())) { + schedule(*(coreEvents[slave_proc_id]), curTick() + clockPeriod()); + } + DPRINTF(STDebug, "Thread %d created \n", + addresstoIDMap[this_event->pthAddr]); + break; + + case STEvent::THREAD_JOIN: + assert(threadStartedMap[addresstoIDMap[this_event->pthAddr]]); + if (eventMap[addresstoIDMap[this_event->pthAddr]]->empty()) + consumed_event = true; + workerThreadCount--; + if (workerThreadCount == 0) + DPRINTF(ROI,"Last Thread Joined.\n"); + break; + + case STEvent::BARRIER_WAIT: + if (!threadContMap[this_event->evThreadID]) { + // Put thread in waitmap + if (threadWaitMap[this_event->pthAddr].find(this_event-> + evThreadID) + == threadWaitMap[this_event->pthAddr].end()) { + threadWaitMap[this_event->pthAddr].insert(this_event-> + evThreadID); + threadStartedMap[this_event->evThreadID] = false; + } + + if (checkBarriers(this_event)) { + set::iterator barr_ittr = + barrierMap[this_event->pthAddr].begin(); + for (; barr_ittr != barrierMap[this_event->pthAddr].end(); + barr_ittr++) { + threadStartedMap[*barr_ittr] = true; + threadContMap[*barr_ittr] = true; + } + threadWaitMap[this_event->pthAddr].clear(); + } + } else { + threadContMap[this_event->evThreadID] = false; + consumed_event = true; + } + + break; + + case STEvent::COND_WAIT: + //TODO: Need to add Condition Wait/Signal code here. + this_event->subEventList->pop_front(); + break; + + case STEvent::COND_SG: + this_event->subEventList->pop_front(); + break; + + case STEvent::SPIN_LOCK: + if (spinLocks.find(this_event->pthAddr) == spinLocks.end()) { + spinLocks.insert(this_event->pthAddr); + consumed_event = true; + } + break; + + case STEvent::SPIN_UNLOCK: + spin_ittr = spinLocks.find(this_event->pthAddr); + assert(spin_ittr != spinLocks.end()); + spinLocks.erase(spin_ittr); + consumed_event = true; + break; + + case STEvent::SEM_INIT: + case STEvent::SEM_WAIT: + case STEvent::SEM_POST: + case STEvent::SEM_GETV: + case STEvent::SEM_DEST: + default: + panic("Invalid pthread event enum value %i.\n", this_event->eventType); + } + + if (consumed_event) { + // Pthread 'dummy' sub event completed + this_event->subEventList->pop_front(); + } +} + +bool +SynchroTrace::checkBarriers(STEvent *this_event) +{ + // Check if this thread is the last one for the barrier by + // comparing the threads waiting on the barrier against the + // map of threads required in the barrier. + set thread_wait_map_set; + set barrier_map_set; + set difference_set; + + thread_wait_map_set = threadWaitMap[this_event->pthAddr]; + barrier_map_set = barrierMap[this_event->pthAddr]; + set_difference(barrier_map_set.begin(), barrier_map_set.end(), + thread_wait_map_set.begin(), thread_wait_map_set.end(), + inserter(difference_set,difference_set.begin())); + if (difference_set.empty()){ + return true; + } else { + return false; + } +} + +void +SynchroTrace::createSubEvents(int proc_id, bool event_id_passed, + ThreadID event_thread_id) +{ + if (!event_id_passed) + event_thread_id = threadMap[proc_id].front(); + + // Skip when thread is inactive + if (!threadStartedMap[event_thread_id]) + return; + + // Skip when no events + if (eventMap[event_thread_id]->empty()) + return; + + STEvent *this_event = eventMap[event_thread_id]->front(); + if (!this_event->subEventsCreated) { + // For Pthread events, create 'dummy' sub event. + if (this_event->eventClass == STEvent::THREAD_API) { + this_event->subEventList = new deque; + SubEvent sub_event(0, 0, false, false); + this_event->subEventList->push_back(sub_event); + } else if (this_event->eventClass == STEvent::COMM) { + // For communication events, create read-based sub events + // for each dependency. + this_event->subEventList = new deque; + + for (unsigned long j = 0; + j < this_event->commPreRequisiteEvents.size(); j++) { + SubEvent sub_event(0, 0, SubEvent::REQ_READ, false, true, + this_event->commPreRequisiteEvents[j]); + this_event->subEventList->push_back(sub_event); + } + } else { + // Computation Event + unsigned long max_loc_reads; + unsigned long max_loc_writes; + + if (this_event->compMemReads >= + this_event->compReadEvents.size()) { + max_loc_reads = this_event->compMemReads; + } else + max_loc_reads = this_event->compReadEvents.size(); + + if (this_event->compMemWrites >= + this_event->compWriteEvents.size()) { + max_loc_writes = this_event->compMemWrites; + } else + max_loc_writes = this_event->compWriteEvents.size(); + + unsigned long total_mem_ops = max_loc_reads + max_loc_writes; + // Create a single sub event if there are no memory ops. + if (total_mem_ops == 0) { + this_event->subEventList = new deque; + SubEvent sub_event(this_event->compIOPS, + this_event->compFLOPS, false, false); + this_event->subEventList->push_back(sub_event); + } else { + // Split up compute ops across the number of requests, i.e. + // the number of sub events. + unsigned long IOPS_div = this_event->compIOPS / + total_mem_ops; + unsigned int IOPS_rem = this_event->compIOPS % + total_mem_ops; + unsigned long FLOPS_div = this_event->compFLOPS / + total_mem_ops; + unsigned int FLOPS_rem = this_event->compFLOPS % + total_mem_ops; + + unsigned long mem_reads_inserted = 0; + unsigned long mem_writes_inserted = 0; + + // Mark off memory accesses and distribute the IOPS + // and FLOPS evenly + this_event->subEventList = new deque; + + // Write first then read + LocalMemAccessType init_type = WRITE; + for (unsigned long j = 0; j < total_mem_ops; j++) { + switch(memTypeToInsert(mem_reads_inserted, + mem_writes_inserted, max_loc_reads, + max_loc_writes, init_type)) { + case READ: + { + SubEvent sub_event( + IOPS_div, FLOPS_div, + SubEvent::REQ_READ, false, true, + this_event-> + compReadEvents[mem_reads_inserted % + this_event->compReadEvents.size()]); + this_event->subEventList->push_back(sub_event); + mem_reads_inserted++; + break; + } + case WRITE: + { + SubEvent sub_event( + IOPS_div, FLOPS_div, + SubEvent::REQ_WRITE, false, true, this_event-> + compWriteEvents[mem_writes_inserted % + this_event->compWriteEvents.size()]); + this_event->subEventList->push_back(sub_event); + mem_writes_inserted++; + break; + } + default: + panic("Invalid memory request type.\n"); + } + + // Flip read/write + init_type = (init_type == WRITE) ? READ : WRITE; + } + + // Distribute the residuals randomly + for (unsigned int j = 0; j < IOPS_rem; j++) { + (*(this_event->subEventList)) + [rand() % total_mem_ops].numIOPS++; + } + + for (unsigned int j = 0; j < FLOPS_rem; j++) { + (*(this_event->subEventList)) + [rand() % total_mem_ops].numFLOPS++; + } + } + } + // Schedule the sub event + SubEvent *top_sub_event = &(this_event->subEventList->front()); + Tick comp_time = clockPeriod() * (Cycles(CPI_IOPS * + top_sub_event->numIOPS) + Cycles(CPI_FLOPS * + top_sub_event->numFLOPS)); + top_sub_event->triggerTime = comp_time + curTick() + clockPeriod(); + this_event->subEventsCreated = true; + } +} + +SynchroTrace::LocalMemAccessType +SynchroTrace::memTypeToInsert(unsigned long loc_reads, + unsigned long loc_writes, + unsigned long max_loc_reads, + unsigned long max_loc_writes, + LocalMemAccessType type) +{ + // Either the number of local reads or writes must not have reached the max + // allowed. + assert(loc_reads < max_loc_reads || loc_writes < max_loc_writes); + // Assert that the type is valid. + assert(type == READ || type == WRITE); + + if (type == READ && loc_reads < max_loc_reads) return READ; + if (loc_writes < max_loc_writes) + return WRITE; + else + return READ; +}