diff -r 43d22d746e7a -r 5985e0c89872 SConstruct --- a/SConstruct Mon Aug 19 03:52:36 2013 -0400 +++ b/SConstruct Mon Aug 19 19:06:26 2013 -0500 @@ -637,8 +637,9 @@ # shared_ptr), and the libc++ STL containers create problems in # combination with the current gem5 code. For now, we stick with # libstdc++ and use the TR1 namespace. - # if sys.platform == "darwin": - # main.Append(CXXFLAGS=['-stdlib=libc++']) + if sys.platform == "darwin": + main.Append(CXXFLAGS=['-stdlib=libc++']) + main.Append(LIBS=['c++']) else: print termcap.Yellow + termcap.Bold + 'Error' + termcap.Normal, diff -r 43d22d746e7a -r 5985e0c89872 src/SConscript --- a/src/SConscript Mon Aug 19 03:52:36 2013 -0400 +++ b/src/SConscript Mon Aug 19 19:06:26 2013 -0500 @@ -895,7 +895,8 @@ # Both gcc and clang have issues with unused labels and values in # the SWIG generated code - swig_env.Append(CCFLAGS=['-Wno-unused-label', '-Wno-unused-value']) + swig_env.Append(CCFLAGS=['-Wno-unused-label', '-Wno-unused-value', + '-Wno-sometimes-uninitialized']) # Add additional warnings here that should not be applied to # the SWIG generated code diff -r 43d22d746e7a -r 5985e0c89872 src/base/barrier.hh --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/base/barrier.hh Mon Aug 19 19:06:26 2013 -0500 @@ -0,0 +1,43 @@ +#ifndef __BASE_BARRIER_HH__ +#define __BASE_BARRIER_HH__ + +#include + +class Barrier +{ + private: + /// Mutex to protect access to numLeft and generation + std::mutex bMutex; + /// Condition variable for waiting on barrier + std::condition_variable bCond; + /// Number of threads we should be waiting for before completing the barrier + unsigned numWaiting; + /// Generation of this barrier + unsigned generation; + /// Number of threads remaining for the current generation + unsigned numLeft; + + public: + Barrier(unsigned _numWaiting) + : numWaiting(_numWaiting), generation(0), numLeft(_numWaiting) + {} + + bool + wait() + { + std::unique_lock lock(bMutex); + unsigned int gen = generation; + + if (--numLeft == 0) { + generation++; + numLeft = numWaiting; + bCond.notify_all(); + return true; + } + while (gen == generation) + bCond.wait(lock); + return false; + } +}; + +#endif // __BASE_BARRIER_HH__ diff -r 43d22d746e7a -r 5985e0c89872 src/cpu/base.cc --- a/src/cpu/base.cc Mon Aug 19 03:52:36 2013 -0400 +++ b/src/cpu/base.cc Mon Aug 19 19:06:26 2013 -0500 @@ -569,7 +569,7 @@ BaseCPU::scheduleInstStop(ThreadID tid, Counter insts, const char *cause) { const Tick now(comInstEventQueue[tid]->getCurTick()); - Event *event(new SimLoopExitEvent(cause, 0)); + Event *event(new LocalSimLoopExitEvent(cause, 0)); comInstEventQueue[tid]->schedule(event, now + insts); } @@ -578,7 +578,7 @@ BaseCPU::scheduleLoadStop(ThreadID tid, Counter loads, const char *cause) { const Tick now(comLoadEventQueue[tid]->getCurTick()); - Event *event(new SimLoopExitEvent(cause, 0)); + Event *event(new LocalSimLoopExitEvent(cause, 0)); comLoadEventQueue[tid]->schedule(event, now + loads); } diff -r 43d22d746e7a -r 5985e0c89872 src/dev/etherlink.cc --- a/src/dev/etherlink.cc Mon Aug 19 03:52:36 2013 -0400 +++ b/src/dev/etherlink.cc Mon Aug 19 19:06:26 2013 -0500 @@ -142,7 +142,9 @@ void process(); virtual void serialize(ostream &os); - virtual void unserialize(Checkpoint *cp, const string §ion); + void unserialize(Checkpoint *cp, const string §ion) {} + void unserialize(Checkpoint *cp, const string §ion, + EventQueue *eventq); static Serializable *createForUnserialize(Checkpoint *cp, const string §ion); }; @@ -259,9 +261,10 @@ void -LinkDelayEvent::unserialize(Checkpoint *cp, const string §ion) +LinkDelayEvent::unserialize(Checkpoint *cp, const string §ion, + EventQueue *eventq) { - Event::unserialize(cp, section); + Event::unserialize(cp, section, eventq); EtherLink *parent; bool number; diff -r 43d22d746e7a -r 5985e0c89872 src/python/m5/SimObject.py --- a/src/python/m5/SimObject.py Mon Aug 19 03:52:36 2013 -0400 +++ b/src/python/m5/SimObject.py Mon Aug 19 19:06:26 2013 -0500 @@ -528,8 +528,6 @@ #endif #include - -class EventQueue; ''') for param in params: param.cxx_predecls(code) @@ -558,16 +556,11 @@ code.indent() if cls == SimObject: code(''' - SimObjectParams() - { - extern EventQueue mainEventQueue; - eventq = &mainEventQueue; - } + SimObjectParams() {} virtual ~SimObjectParams() {} std::string name; PyObject *pyobj; - EventQueue *eventq; ''') for param in params: param.cxx_decl(code) @@ -582,6 +575,14 @@ return code +# This *temporary* definition is required to support calls from the +# SimObject class definition to the MetaSimObject methods (in +# particular _set_param, which gets called for parameters with default +# values defined on the SimObject class itself). It will get +# overridden by the permanent definition (which requires that +# SimObject be defined) lower in this file. +def isSimObjectOrVector(value): + return False # The SimObject class is the root of the special hierarchy. Most of # the code in this class deals with the configuration hierarchy itself @@ -592,8 +593,9 @@ __metaclass__ = MetaSimObject type = 'SimObject' abstract = True + eventq_index = Param.UInt32(Parent.eventq_index, "Event Queue Index") + cxx_header = "sim/sim_object.hh" - cxx_bases = [ "Drainable", "Serializable" ] @classmethod diff -r 43d22d746e7a -r 5985e0c89872 src/python/m5/event.py --- a/src/python/m5/event.py Mon Aug 19 03:52:36 2013 -0400 +++ b/src/python/m5/event.py Mon Aug 19 19:06:26 2013 -0500 @@ -29,9 +29,9 @@ import m5 import internal.event -from internal.event import PythonEvent, SimLoopExitEvent as SimExit +from internal.event import PythonEvent, GlobalSimLoopExitEvent as SimExit -mainq = internal.event.cvar.mainEventQueue +mainq = None def create(obj, priority=None): if priority is None: @@ -58,4 +58,10 @@ print "Progress! Time now %fs" % (m5.curTick()/1e12) self.eventq.schedule(self, m5.curTick() + self.period) +def getEventQueue(index): + return internal.event.getEventQueue(index) + +def setEventQueue(eventq): + internal.event.curEventQueue(eventq) + __all__ = [ 'create', 'Event', 'ProgressEvent', 'SimExit', 'mainq' ] diff -r 43d22d746e7a -r 5985e0c89872 src/python/m5/main.py --- a/src/python/m5/main.py Mon Aug 19 03:52:36 2013 -0400 +++ b/src/python/m5/main.py Mon Aug 19 19:06:26 2013 -0500 @@ -193,6 +193,10 @@ fatal("Tracing is not enabled. Compile with TRACING_ON") + # Set the main event queue for the main thread. + event.mainq = event.getEventQueue(0) + event.setEventQueue(event.mainq) + if not os.path.isdir(options.outdir): os.makedirs(options.outdir) diff -r 43d22d746e7a -r 5985e0c89872 src/python/m5/simulate.py --- a/src/python/m5/simulate.py Mon Aug 19 03:52:36 2013 -0400 +++ b/src/python/m5/simulate.py Mon Aug 19 19:06:26 2013 -0500 @@ -139,6 +139,7 @@ need_resume = [] need_startup = True + def simulate(*args, **kwargs): global need_resume, need_startup @@ -147,6 +148,13 @@ for obj in root.descendants(): obj.startup() need_startup = False + # Python exit handlers happen in reverse order. + # We want to dump stats last. + atexit.register(stats.dump) + + # register our C++ exit callback function with Python + atexit.register(internal.core.doExitCleanup) + for root in need_resume: resume(root) need_resume = [] @@ -157,12 +165,6 @@ def curTick(): return internal.core.curTick() -# Python exit handlers happen in reverse order. We want to dump stats last. -atexit.register(stats.dump) - -# register our C++ exit callback function with Python -atexit.register(internal.core.doExitCleanup) - # Drain the system in preparation of a checkpoint or memory mode # switch. def drain(root): diff -r 43d22d746e7a -r 5985e0c89872 src/python/swig/core.i --- a/src/python/swig/core.i Mon Aug 19 03:52:36 2013 -0400 +++ b/src/python/swig/core.i Mon Aug 19 19:06:26 2013 -0500 @@ -92,4 +92,3 @@ bool want_warn, warn_verbose; bool want_info, info_verbose; bool want_hack, hack_verbose; - diff -r 43d22d746e7a -r 5985e0c89872 src/python/swig/event.i --- a/src/python/swig/event.i Mon Aug 19 03:52:36 2013 -0400 +++ b/src/python/swig/event.i Mon Aug 19 19:06:26 2013 -0500 @@ -82,12 +82,13 @@ %include "python/swig/pyevent.hh" // minimal definition of SimExitEvent interface to wrap -class SimLoopExitEvent : public Event +class GlobalSimLoopExitEvent { public: std::string getCause(); int getCode(); - SimLoopExitEvent(const std::string &_cause, int c, Tick _repeat = 0); + GlobalSimLoopExitEvent(Tick when, const std::string &_cause, int c, + Tick _repeat = 0); }; %exception simulate { @@ -96,5 +97,9 @@ return NULL; } } -SimLoopExitEvent *simulate(Tick num_cycles = MaxTick); + +GlobalSimLoopExitEvent *simulate(Tick num_cycles = MaxTick); void exitSimLoop(const std::string &message, int exit_code); + +void curEventQueue( EventQueue *); +EventQueue *getEventQueue(uint32_t index); diff -r 43d22d746e7a -r 5985e0c89872 src/sim/Root.py --- a/src/sim/Root.py Mon Aug 19 03:52:36 2013 -0400 +++ b/src/sim/Root.py Mon Aug 19 19:06:26 2013 -0500 @@ -60,6 +60,14 @@ type = 'Root' cxx_header = "sim/root.hh" + # By default, root sim object and hence all other sim objects schedule + # event on the eventq with index 0. + eventq_index = 0 + + # Simulation Quantum for multiple main event queue simulation. + # Needs to be set explicitly for a multi-eventq simulation. + sim_quantum = Param.Tick(0, "simulation quantum") + full_system = Param.Bool("if this is a full system simulation") # Time syncing prevents the simulation from running faster than real time. diff -r 43d22d746e7a -r 5985e0c89872 src/sim/SConscript --- a/src/sim/SConscript Mon Aug 19 03:52:36 2013 -0400 +++ b/src/sim/SConscript Mon Aug 19 19:06:26 2013 -0500 @@ -42,6 +42,7 @@ Source('core.cc') Source('debug.cc') Source('eventq.cc') +Source('global_event.cc') Source('init.cc') Source('main.cc', main=True, skip_lib=True) Source('root.cc') diff -r 43d22d746e7a -r 5985e0c89872 src/sim/core.hh --- a/src/sim/core.hh Mon Aug 19 03:52:36 2013 -0400 +++ b/src/sim/core.hh Mon Aug 19 19:06:26 2013 -0500 @@ -42,7 +42,7 @@ #include "sim/eventq.hh" /// The universal simulation clock. -inline Tick curTick() { return mainEventQueue.getCurTick(); } +inline Tick curTick() { return _tls_mainEventQueue->getCurTick(); } const Tick retryTime = 1000; diff -r 43d22d746e7a -r 5985e0c89872 src/sim/debug.cc --- a/src/sim/debug.cc Mon Aug 19 03:52:36 2013 -0400 +++ b/src/sim/debug.cc Mon Aug 19 19:06:26 2013 -0500 @@ -37,6 +37,7 @@ #include "base/debug.hh" #include "sim/debug.hh" #include "sim/eventq_impl.hh" +#include "sim/global_event.hh" #include "sim/sim_events.hh" #include "sim/sim_exit.hh" @@ -46,9 +47,9 @@ // Debug event: place a breakpoint on the process function and // schedule the event to break at a particular cycle // -struct DebugBreakEvent : public Event +struct DebugBreakEvent : public GlobalEvent { - DebugBreakEvent(); + DebugBreakEvent(Tick when); void process(); // process event virtual const char *description() const; }; @@ -56,8 +57,8 @@ // // constructor: schedule at specified time // -DebugBreakEvent::DebugBreakEvent() - : Event(Debug_Break_Pri, AutoDelete) +DebugBreakEvent::DebugBreakEvent(Tick when) + : GlobalEvent(when, Debug_Break_Pri, AutoDelete) { } @@ -84,7 +85,7 @@ void schedBreakCycle(Tick when) { - mainEventQueue.schedule(new DebugBreakEvent, when); + new DebugBreakEvent(when); warn("need to stop all queues"); } @@ -102,8 +103,9 @@ void eventqDump() { - mainEventQueue.dump(); - warn("need to dump all queues"); + for (uint32_t i = 0; i < numMainEventQueues; ++i) { + mainEventQueue[i]->dump(); + } } void diff -r 43d22d746e7a -r 5985e0c89872 src/sim/eventq.hh --- a/src/sim/eventq.hh Mon Aug 19 03:52:36 2013 -0400 +++ b/src/sim/eventq.hh Mon Aug 19 19:06:26 2013 -0500 @@ -1,5 +1,6 @@ /* * Copyright (c) 2000-2005 The Regents of The University of Michigan + * Copyright (c) 2013 Advanced Micro Devices, Inc. * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -40,6 +41,7 @@ #include #include #include +#include #include #include "base/flags.hh" @@ -49,20 +51,48 @@ #include "sim/serialize.hh" class EventQueue; // forward declaration +class BaseGlobalEvent; -extern EventQueue mainEventQueue; +//! Simulation Quantum for multiple eventq simulation. +extern Tick simQuantum; -/* - * An item on an event queue. The action caused by a given - * event is specified by deriving a subclass and overriding the - * process() member function. - * - * Caution, the order of members is chosen to maximize data packing. +//! Current number of allocated main event queues. +extern uint32_t numMainEventQueues; + +//! Array for main event queues. +extern std::vector mainEventQueue; + +#ifndef SWIG +//! The current event queue for the running thread. Access to this queue +//! does not require any locking from the thread. + +#ifdef __linux__ +extern __thread EventQueue *_tls_mainEventQueue; +#else +extern EventQueue *_tls_mainEventQueue; +#endif + +#endif + +//! Current mode of execution: parallel / serial +extern bool inParallelMode; + +//! Function for returning eventq queue for the provided +//! index. The function allocates a new queue in case one +//! does not exist for the index, provided that the index +//! is with in bounds. +EventQueue *getEventQueue(uint32_t index); + +inline EventQueue *curEventQueue() { return _tls_mainEventQueue; } +inline void curEventQueue(EventQueue *q) { _tls_mainEventQueue = q; } + +/** + * Common base class for Event and GlobalEvent, so they can share flag + * and priority definitions and accessor functions. This class should + * not be used directly. */ -class Event : public Serializable +class EventBase { - friend class EventQueue; - protected: typedef unsigned short FlagsType; typedef ::Flags Flags; @@ -78,107 +108,9 @@ static const FlagsType Initialized = 0x7a40; // somewhat random bits static const FlagsType InitMask = 0xffc0; // mask for init bits - bool - initialized() const - { - return this && (flags & InitMask) == Initialized; - } - public: typedef int8_t Priority; - private: - // The event queue is now a linked list of linked lists. The - // 'nextBin' pointer is to find the bin, where a bin is defined as - // when+priority. All events in the same bin will be stored in a - // second linked list (a stack) maintained by the 'nextInBin' - // pointer. The list will be accessed in LIFO order. The end - // result is that the insert/removal in 'nextBin' is - // linear/constant, and the lookup/removal in 'nextInBin' is - // constant/constant. Hopefully this is a significant improvement - // over the current fully linear insertion. - Event *nextBin; - Event *nextInBin; - - static Event *insertBefore(Event *event, Event *curr); - static Event *removeItem(Event *event, Event *last); - - Tick _when; //!< timestamp when event should be processed - Priority _priority; //!< event priority - Flags flags; - -#ifndef NDEBUG - /// Global counter to generate unique IDs for Event instances - static Counter instanceCounter; - - /// This event's unique ID. We can also use pointer values for - /// this but they're not consistent across runs making debugging - /// more difficult. Thus we use a global counter value when - /// debugging. - Counter instance; - - /// queue to which this event belongs (though it may or may not be - /// scheduled on this queue yet) - EventQueue *queue; -#endif - -#ifdef EVENTQ_DEBUG - Tick whenCreated; //!< time created - Tick whenScheduled; //!< time scheduled -#endif - - void - setWhen(Tick when, EventQueue *q) - { - _when = when; -#ifndef NDEBUG - queue = q; -#endif -#ifdef EVENTQ_DEBUG - whenScheduled = curTick(); -#endif - } - - protected: - /// Accessor for flags. - Flags - getFlags() const - { - return flags & PublicRead; - } - - bool - isFlagSet(Flags _flags) const - { - assert(_flags.noneSet(~PublicRead)); - return flags.isSet(_flags); - } - - /// Accessor for flags. - void - setFlags(Flags _flags) - { - assert(_flags.noneSet(~PublicWrite)); - flags.set(_flags); - } - - void - clearFlags(Flags _flags) - { - assert(_flags.noneSet(~PublicWrite)); - flags.clear(_flags); - } - - void - clearFlags() - { - flags.clear(PublicWrite); - } - - // This function isn't really useful if TRACING_ON is not defined - virtual void trace(const char *action); //!< trace event activity - - public: /// Event priorities, to provide tie-breakers for events scheduled /// at the same cycle. Most events are scheduled at the default /// priority; these values are used to control events that need to @@ -233,14 +165,124 @@ /// Maximum priority static const Priority Maximum_Pri = SCHAR_MAX; +}; + +/* + * An item on an event queue. The action caused by a given + * event is specified by deriving a subclass and overriding the + * process() member function. + * + * Caution, the order of members is chosen to maximize data packing. + */ +class Event : public EventBase, public Serializable +{ + friend class EventQueue; + + private: + // The event queue is now a linked list of linked lists. The + // 'nextBin' pointer is to find the bin, where a bin is defined as + // when+priority. All events in the same bin will be stored in a + // second linked list (a stack) maintained by the 'nextInBin' + // pointer. The list will be accessed in LIFO order. The end + // result is that the insert/removal in 'nextBin' is + // linear/constant, and the lookup/removal in 'nextInBin' is + // constant/constant. Hopefully this is a significant improvement + // over the current fully linear insertion. + Event *nextBin; + Event *nextInBin; + + static Event *insertBefore(Event *event, Event *curr); + static Event *removeItem(Event *event, Event *last); + + Tick _when; //!< timestamp when event should be processed + Priority _priority; //!< event priority + Flags flags; + +#ifndef NDEBUG + /// Global counter to generate unique IDs for Event instances + static Counter instanceCounter; + + /// This event's unique ID. We can also use pointer values for + /// this but they're not consistent across runs making debugging + /// more difficult. Thus we use a global counter value when + /// debugging. + Counter instance; + + /// queue to which this event belongs (though it may or may not be + /// scheduled on this queue yet) + EventQueue *queue; +#endif + +#ifdef EVENTQ_DEBUG + Tick whenCreated; //!< time created + Tick whenScheduled; //!< time scheduled +#endif + + void + setWhen(Tick when, EventQueue *q) + { + _when = when; +#ifndef NDEBUG + queue = q; +#endif +#ifdef EVENTQ_DEBUG + whenScheduled = curTick(); +#endif + } + + bool + initialized() const + { + return this && (flags & InitMask) == Initialized; + } + + protected: + /// Accessor for flags. + Flags + getFlags() const + { + return flags & PublicRead; + } + + bool + isFlagSet(Flags _flags) const + { + assert(_flags.noneSet(~PublicRead)); + return flags.isSet(_flags); + } + + /// Accessor for flags. + void + setFlags(Flags _flags) + { + assert(_flags.noneSet(~PublicWrite)); + flags.set(_flags); + } + + void + clearFlags(Flags _flags) + { + assert(_flags.noneSet(~PublicWrite)); + flags.clear(_flags); + } + + void + clearFlags() + { + flags.clear(PublicWrite); + } + + // This function isn't really useful if TRACING_ON is not defined + virtual void trace(const char *action); //!< trace event activity + + public: /* * Event constructor * @param queue that the event gets scheduled on */ Event(Priority p = Default_Pri, Flags f = 0) - : nextBin(NULL), nextInBin(NULL), _priority(p), - flags(Initialized | f) + : nextBin(NULL), nextInBin(NULL), _priority(p), flags(Initialized | f) { assert(f.noneSet(~PublicWrite)); #ifndef NDEBUG @@ -295,9 +337,16 @@ /// Get the event priority Priority priority() const { return _priority; } + //! If this is part of a GlobalEvent, return the pointer to the + //! Global Event. By default, there is no GlobalEvent, so return + //! NULL. (Overridden in GlobalEvent::BarrierEvent.) + virtual BaseGlobalEvent *globalEvent() { return NULL; } + #ifndef SWIG virtual void serialize(std::ostream &os); virtual void unserialize(Checkpoint *cp, const std::string §ion); + void unserialize(Checkpoint *cp, const std::string §ion, + EventQueue *eventq); #endif }; @@ -305,27 +354,27 @@ inline bool operator<(const Event &l, const Event &r) { - return l.when() < r.when() || + return (l.when() < r.when()) || (l.when() == r.when() && l.priority() < r.priority()); } inline bool operator>(const Event &l, const Event &r) { - return l.when() > r.when() || + return (l.when() > r.when()) || (l.when() == r.when() && l.priority() > r.priority()); } inline bool operator<=(const Event &l, const Event &r) { - return l.when() < r.when() || + return (l.when() < r.when()) || (l.when() == r.when() && l.priority() <= r.priority()); } inline bool operator>=(const Event &l, const Event &r) { - return l.when() > r.when() || + return (l.when() > r.when()) || (l.when() == r.when() && l.priority() >= r.priority()); } @@ -352,20 +401,42 @@ Event *head; Tick _curTick; + //! Mutex to protect async queue. + std::mutex *async_queue_mutex; + + //! List of events added by other threads to this event queue. + std::list async_queue; + + //! Insert / remove event from the queue. Should only be called + //! by thread operating this queue. void insert(Event *event); void remove(Event *event); + //! Function for adding events to the async queue. The added events + //! are added to main event queue later. Threads, other than the + //! owning thread, should call this function instead of insert(). + void asyncInsert(Event *event); + EventQueue(const EventQueue &); - const EventQueue &operator=(const EventQueue &); public: EventQueue(const std::string &n); + EventQueue() { EventQueue(""); } virtual const std::string name() const { return objName; } + void name(const std::string &st) { objName = st; } + void initAsyncLock() { async_queue_mutex = new std::mutex(); } - // schedule the given event on this queue - void schedule(Event *event, Tick when); + //! Schedule the given event on this queue. Safe to call from any + //! thread. + void schedule(Event *event, Tick when, bool global = false); + + //! Deschedule the specified. Should be called only from the + //! owning thread. void deschedule(Event *event); + + //! Reschedule the specified event. Should be called only from + //! the owning thread. void reschedule(Event *event, Tick when, bool always = false); Tick nextTick() const { return head->when(); } @@ -402,6 +473,9 @@ bool debugVerify() const; + //! Function for moving events from the async_queue to the main queue. + void handleAsyncInsertions(); + /** * function for replacing the head of the event queue, so that a * different set of events can run without disturbing events that have diff -r 43d22d746e7a -r 5985e0c89872 src/sim/eventq.cc --- a/src/sim/eventq.cc Mon Aug 19 03:52:36 2013 -0400 +++ b/src/sim/eventq.cc Mon Aug 19 19:06:26 2013 -0500 @@ -1,6 +1,7 @@ /* * Copyright (c) 2000-2005 The Regents of The University of Michigan * Copyright (c) 2008 The Hewlett-Packard Development Company + * Copyright (c) 2013 Advanced Micro Devices, Inc. * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -46,13 +47,43 @@ using namespace std; +Tick simQuantum = 0; + // -// Main Event Queue +// Main Event Queues // -// Events on this queue are processed at the *beginning* of each +// Events on these queues are processed at the *beginning* of each // cycle, before the pipeline simulation is performed. // -EventQueue mainEventQueue("Main Event Queue"); +uint32_t numMainEventQueues = 0; + +vector mainEventQueue; + +#ifdef __linux__ +__thread EventQueue *_tls_mainEventQueue = NULL; +#else +EventQueue *_tls_mainEventQueue = NULL; +#endif + +bool inParallelMode = false; + +EventQueue * +getEventQueue(uint32_t index) +{ +#ifndef __linux__ + if (index != 0) { + fatal("Multi queue simulation allowed only if TLS is available!"); + } +#endif + + while (numMainEventQueues <= index) { + numMainEventQueues++; + mainEventQueue.push_back( + new EventQueue(csprintf("MainEventQueue-%d", index))); + } + + return mainEventQueue[index]; +} #ifndef NDEBUG Counter Event::instanceCounter = 0; @@ -156,6 +187,8 @@ if (head == NULL) panic("event not found!"); + assert(event->queue == this); + // deal with an event on the head's 'in bin' list (event has the same // time as the head) if (*head == *event) { @@ -232,8 +265,13 @@ void Event::unserialize(Checkpoint *cp, const string §ion) { +} + +void +Event::unserialize(Checkpoint *cp, const string §ion, EventQueue *eventq) +{ if (scheduled()) - mainEventQueue.deschedule(this); + eventq->deschedule(this); UNSERIALIZE_SCALAR(_when); UNSERIALIZE_SCALAR(_priority); @@ -259,7 +297,7 @@ if (wasScheduled) { DPRINTF(Config, "rescheduling at %d\n", _when); - mainEventQueue.schedule(this, _when); + eventq->schedule(this, _when); } } @@ -388,7 +426,9 @@ void dumpMainQueue() { - mainEventQueue.dump(); + for (uint32_t i = 0; i < numMainEventQueues; ++i) { + mainEventQueue[i]->dump(); + } } @@ -433,4 +473,28 @@ EventQueue::EventQueue(const string &n) : objName(n), head(NULL), _curTick(0) -{} +{ + async_queue_mutex = new std::mutex(); +} + +void +EventQueue::asyncInsert(Event *event) +{ + async_queue_mutex->lock(); + async_queue.push_back(event); + async_queue_mutex->unlock(); +} + +void +EventQueue::handleAsyncInsertions() +{ + assert(this == curEventQueue()); + async_queue_mutex->lock(); + + while (!async_queue.empty()) { + insert(async_queue.front()); + async_queue.pop_front(); + } + + async_queue_mutex->unlock(); +} diff -r 43d22d746e7a -r 5985e0c89872 src/sim/eventq_impl.hh --- a/src/sim/eventq_impl.hh Mon Aug 19 03:52:36 2013 -0400 +++ b/src/sim/eventq_impl.hh Mon Aug 19 19:06:26 2013 -0500 @@ -38,19 +38,26 @@ #include "sim/eventq.hh" inline void -EventQueue::schedule(Event *event, Tick when) +EventQueue::schedule(Event *event, Tick when, bool global) { assert(when >= getCurTick()); assert(!event->scheduled()); assert(event->initialized()); event->setWhen(when, this); - insert(event); + + // The check below is to make sure of two things + // a. a thread schedules local events on other queues through the asyncq + // b. a thread schedules global events on the asyncq, whether or not + // this event belongs to this eventq. This is required to maintain + // a total order amongst the global events. See global_event.{cc,hh} + // for more explanation. + if ((this != curEventQueue() || global) && inParallelMode) { + asyncInsert(event); + } else { + insert(event); + } event->flags.set(Event::Scheduled); - if (this == &mainEventQueue) - event->flags.set(Event::IsMainQueue); - else - event->flags.clear(Event::IsMainQueue); if (DTRACE(Event)) event->trace("scheduled"); @@ -61,6 +68,7 @@ { assert(event->scheduled()); assert(event->initialized()); + assert(this == curEventQueue() || !inParallelMode); remove(event); @@ -80,6 +88,7 @@ assert(when >= getCurTick()); assert(always || event->scheduled()); assert(event->initialized()); + assert(this == curEventQueue() || !inParallelMode); if (event->scheduled()) remove(event); @@ -88,10 +97,6 @@ insert(event); event->flags.clear(Event::Squashed); event->flags.set(Event::Scheduled); - if (this == &mainEventQueue) - event->flags.set(Event::IsMainQueue); - else - event->flags.clear(Event::IsMainQueue); if (DTRACE(Event)) event->trace("rescheduled"); diff -r 43d22d746e7a -r 5985e0c89872 src/sim/global_event.hh --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/sim/global_event.hh Mon Aug 19 19:06:26 2013 -0500 @@ -0,0 +1,231 @@ +/* + * Copyright (c) 2011 Advanced Micro Devices, Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Authors: Steve Reinhardt + */ + +#ifndef __SIM_GLOBAL_EVENT_HH__ +#define __SIM_GLOBAL_EVENT_HH__ + +#include + +#include +#include + +#include "base/barrier.hh" +#include "sim/eventq_impl.hh" + +/** + * @file sim/global_event.hh + * Global events and related declarations. + * + * A global event is an event that occurs across all threads, i.e., + * globally. It consists of a set of "local" (regular) Events, one + * per thread/event queue, a barrier object, and common state. The + * local events are scheduled for the same tick. The local event + * process() method enters the barrier to wait for other threads; once + * all threads reach that tick (and enter the associated barrier), the + * global event is triggered and its associated activity is performed. + * + * There are two basic global event patterns, GlobalEvent and + * GlobalSyncEvent. GlobalEvent is the base class for typical global + * events, while GlobalSyncEvent is optimized for global + * synchronization operations. + */ + +/** + * Common base class for GlobalEvent and GlobalSyncEvent. + */ +class BaseGlobalEvent : public EventBase +{ + private: + //! Mutex variable for providing exculsive right to schedule global + //! events. This is necessary so that a total order can be maintained + //! amongst the global events. Without ensuring the total order, it is + //! possible that threads execute global events in different orders, + //! which can result in a deadlock. + static std::mutex globalQMutex; + + protected: + + /// The base class for the local events that will synchronize + /// threads to perform the global event. This class is abstract, + /// since it derives from the abstract Event class but still does + /// not define the required process() method. + class BarrierEvent : public Event + { + protected: + BaseGlobalEvent *_globalEvent; + + BarrierEvent(BaseGlobalEvent *global_event, Priority p, Flags f) + : Event(p, f), _globalEvent(global_event) + { + } + + ~BarrierEvent(); + + friend class BaseGlobalEvent; + + bool globalBarrier() + { + return _globalEvent->barrier->wait(); + } + + public: + virtual BaseGlobalEvent *globalEvent() { return _globalEvent; } + }; + + //! The barrier that all threads wait on before performing the + //! global event. + Barrier *barrier; + + //! The individual local event instances (one per thread/event queue). + std::vector barrierEvent; + + public: + BaseGlobalEvent(Priority p, Flags f); + + virtual ~BaseGlobalEvent(); + + virtual void process() = 0; + + virtual const char *description() const = 0; + + void schedule(Tick when); + + bool scheduled() const + { + bool sched = false; + for (uint32_t i = 0; i < numMainEventQueues; ++i) { + sched = sched or barrierEvent[i]->scheduled(); + } + + return sched; + } + + Tick when() const + { + assert(numMainEventQueues > 0); + return barrierEvent[0]->when(); + } + + void deschedule(); + void reschedule(Tick when); +}; + + +/** + * Funky intermediate class to support CRTP so that we can have a + * common constructor to create the local events, even though the + * types of the local events are defined in the derived classes. + */ +template +class BaseGlobalEventTemplate : public BaseGlobalEvent +{ + protected: + BaseGlobalEventTemplate(Priority p, Flags f) + : BaseGlobalEvent(p, f) + { + for (int i = 0; i < numMainEventQueues; ++i) + barrierEvent[i] = new typename Derived::BarrierEvent(this, p, f); + } +}; + + +/** + * The main global event class. Ordinary global events should derive + * from this class, and define process() to specify the action to be + * taken when the event is reached. All threads will synchronize at a + * barrier, exactly one of the threads will execute the process() + * method, then the threads will synchronize again so that none of + * them continue until process() is complete. + */ +class GlobalEvent : public BaseGlobalEventTemplate +{ + public: + typedef BaseGlobalEventTemplate Base; + + class BarrierEvent : public Base::BarrierEvent + { + public: + void process(); + BarrierEvent(Base *global_event, Priority p, Flags f) + : Base::BarrierEvent(global_event, p, f) + { } + }; + + GlobalEvent(Priority p, Flags f) + : Base(p, f) + { } + + GlobalEvent(Tick when, Priority p, Flags f) + : Base(p, f) + { + schedule(when); + } + + virtual void process() = 0; +}; + +/** + * A special global event that synchronizes all threads and forces + * them to process asynchronously enqueued events. Useful for + * separating quanta in a quantum-based parallel simulation. + */ +class GlobalSyncEvent : public BaseGlobalEventTemplate +{ + public: + typedef BaseGlobalEventTemplate Base; + + class BarrierEvent : public Base::BarrierEvent + { + public: + void process(); + BarrierEvent(Base *global_event, Priority p, Flags f) + : Base::BarrierEvent(global_event, p, f) + { } + }; + + GlobalSyncEvent(Priority p, Flags f) + : Base(p, f) + { } + + GlobalSyncEvent(Tick when, Tick _repeat, Priority p, Flags f) + : Base(p, f), repeat(_repeat) + { + schedule(when); + } + + void process(); + + const char *description() const; + + Tick repeat; +}; + + +#endif // __SIM_GLOBAL_EVENT_HH__ diff -r 43d22d746e7a -r 5985e0c89872 src/sim/global_event.cc --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/sim/global_event.cc Mon Aug 19 19:06:26 2013 -0500 @@ -0,0 +1,165 @@ +/* + * Copyright (c) 2011 Advanced Micro Devices, Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Authors: Steve Reinhardt + */ + +#include "sim/global_event.hh" + +std::mutex BaseGlobalEvent::globalQMutex; + +BaseGlobalEvent::BaseGlobalEvent(Priority p, Flags f) +{ + barrierEvent.resize(numMainEventQueues); + barrier = new Barrier(numMainEventQueues); +} + + +BaseGlobalEvent::~BaseGlobalEvent() +{ + // see GlobalEvent::BarrierEvent::~BarrierEvent() comments + if (barrierEvent[0] != NULL) { + for (int i = 0; i < numMainEventQueues; ++i) + delete barrierEvent[i]; + } +} + + +void BaseGlobalEvent::schedule(Tick when) +{ + // This function is scheduling a global event, which actually is a + // set of local events, one event on each eventq. Global events need + // to have a total order. A thread cannot start executing events that + // follow a global event till all other threads have executed that global + // event as well. If global events were not in a total order, a deadlock + // would occur for there will be two threads who would be waiting for + // each other to execute the global events they themselves have executed. + // + // To ensure this total order, we do two things. + // First, before scheduling any global event, a thread needs to acquire + // the lock globalQMutex. This ensures that only one thread can schedule + // global events at any given time. + // Second, the local events corresponding to a global event are always + // first inserted in to the asyncq, irrespective of whether or not the + // thread scheduling the event owns the eventq on which the event is + // being scheduled. Thus global events have the same order in the asyncq + // of each thread. When they are inserted in the actual eventq, the + // comparators in the Event class ensure that the total order is + // maintained. + + globalQMutex.lock(); + + for (int i = 0; i < numMainEventQueues; ++i) { + mainEventQueue[i]->schedule(barrierEvent[i], when, true); + } + + globalQMutex.unlock(); +} + +void BaseGlobalEvent::deschedule() +{ + EventQueue *q = curEventQueue(); + for (uint32_t i = 0; i < numMainEventQueues; ++i) { + if (barrierEvent[i]->scheduled()) { + curEventQueue(mainEventQueue[i]); + mainEventQueue[i]->deschedule(barrierEvent[i]); + } + } + + curEventQueue(q); +} + +void BaseGlobalEvent::reschedule(Tick when) +{ + // Read the comment in the schedule() function above. + globalQMutex.lock(); + + for (uint32_t i = 0; i < numMainEventQueues; ++i) { + if (barrierEvent[i]->scheduled()) + mainEventQueue[i]->reschedule(barrierEvent[i], when); + else + mainEventQueue[i]->schedule(barrierEvent[i], when, true); + } + + globalQMutex.unlock(); +} + +BaseGlobalEvent::BarrierEvent::~BarrierEvent() +{ + // if AutoDelete is set, local events will get deleted in event + // loop, but we need to delete GlobalEvent object too... so let + // the local event in slot 0 do it + if (isFlagSet(AutoDelete) && _globalEvent->barrierEvent[0] == this) { + // set backpointer to NULL so that global event knows not to + // turn around and recursively delete local events + _globalEvent->barrierEvent[0] = NULL; + delete _globalEvent; + } +} + + +void +GlobalEvent::BarrierEvent::process() +{ + // wait for all queues to arrive at barrier, then process event + if (globalBarrier()) { + _globalEvent->process(); + } + + // second barrier to force all queues to wait for event processing + // to finish before continuing + globalBarrier(); +} + + +void +GlobalSyncEvent::BarrierEvent::process() +{ + // wait for all queues to arrive at barrier, then process event + if (globalBarrier()) { + _globalEvent->process(); + } + + // second barrier to force all queues to wait for event processing + // to finish before continuing + globalBarrier(); + curEventQueue()->handleAsyncInsertions(); +} + +void +GlobalSyncEvent::process() +{ + if (repeat) { + schedule(curTick() + repeat); + } +} + +const char * +GlobalSyncEvent::description() const +{ + return "GlobalSyncEvent"; +} diff -r 43d22d746e7a -r 5985e0c89872 src/sim/root.cc --- a/src/sim/root.cc Mon Aug 19 03:52:36 2013 -0400 +++ b/src/sim/root.cc Mon Aug 19 19:06:26 2013 -0500 @@ -112,6 +112,8 @@ assert(_root == NULL); _root = this; lastTime.setTimer(); + + simQuantum = p->sim_quantum; } void diff -r 43d22d746e7a -r 5985e0c89872 src/sim/serialize.hh --- a/src/sim/serialize.hh Mon Aug 19 03:52:36 2013 -0400 +++ b/src/sim/serialize.hh Mon Aug 19 19:06:26 2013 -0500 @@ -49,6 +49,7 @@ class Serializable; class Checkpoint; class SimObject; +class EventQueue; /** The current version of the checkpoint format. * This should be incremented by 1 and only 1 for every new version, where a new diff -r 43d22d746e7a -r 5985e0c89872 src/sim/serialize.cc --- a/src/sim/serialize.cc Mon Aug 19 03:52:36 2013 -0400 +++ b/src/sim/serialize.cc Mon Aug 19 19:06:26 2013 -0500 @@ -456,8 +456,12 @@ nameOut(os); paramOut(os, "curTick", curTick()); - nameOut(os, "MainEventQueue"); - mainEventQueue.serialize(os); + paramOut(os, "numMainEventQueues", numMainEventQueues); + + for (uint32_t i = 0; i < numMainEventQueues; ++i) { + nameOut(os, "MainEventQueue"); + mainEventQueue[i]->serialize(os); + } } void @@ -465,9 +469,12 @@ { Tick tick; paramIn(cp, section, "curTick", tick); - mainEventQueue.setCurTick(tick); + paramIn(cp, section, "numMainEventQueues", numMainEventQueues); - mainEventQueue.unserialize(cp, "MainEventQueue"); + for (uint32_t i = 0; i < numMainEventQueues; ++i) { + mainEventQueue[i]->setCurTick(tick); + mainEventQueue[i]->unserialize(cp, "MainEventQueue"); + } } Serializable::Serializable() diff -r 43d22d746e7a -r 5985e0c89872 src/sim/sim_events.hh --- a/src/sim/sim_events.hh Mon Aug 19 03:52:36 2013 -0400 +++ b/src/sim/sim_events.hh Mon Aug 19 19:06:26 2013 -0500 @@ -31,12 +31,12 @@ #ifndef __SIM_SIM_EVENTS_HH__ #define __SIM_SIM_EVENTS_HH__ -#include "sim/eventq.hh" +#include "sim/global_event.hh" // // Event to terminate simulation at a particular cycle/instruction // -class SimLoopExitEvent : public Event +class GlobalSimLoopExitEvent : public GlobalEvent { protected: // string explaining why we're terminating @@ -45,7 +45,27 @@ Tick repeat; public: - SimLoopExitEvent(const std::string &_cause, int c, Tick repeat = 0); + GlobalSimLoopExitEvent(Tick when, const std::string &_cause, int c, + Tick repeat = 0); + + std::string getCause() { return cause; } + int getCode() { return code; } + + void process(); // process event + + virtual const char *description() const; +}; + +class LocalSimLoopExitEvent : public Event +{ + protected: + // string explaining why we're terminating + std::string cause; + int code; + Tick repeat; + + public: + LocalSimLoopExitEvent(const std::string &_cause, int c, Tick repeat = 0); std::string getCause() { return cause; } int getCode() { return code; } diff -r 43d22d746e7a -r 5985e0c89872 src/sim/sim_events.cc --- a/src/sim/sim_events.cc Mon Aug 19 03:52:36 2013 -0400 +++ b/src/sim/sim_events.cc Mon Aug 19 19:06:26 2013 -0500 @@ -39,48 +39,57 @@ using namespace std; -SimLoopExitEvent::SimLoopExitEvent(const std::string &_cause, int c, Tick r) - : Event(Sim_Exit_Pri, IsExitEvent), cause(_cause), code(c), repeat(r) +GlobalSimLoopExitEvent::GlobalSimLoopExitEvent(Tick when, + const std::string &_cause, + int c, Tick r) + : GlobalEvent(when, Sim_Exit_Pri, IsExitEvent), + cause(_cause), code(c), repeat(r) { } - // // handle termination event // void -SimLoopExitEvent::process() +GlobalSimLoopExitEvent::process() { - // if this got scheduled on a different queue (e.g. the committed - // instruction queue) then make a corresponding event on the main - // queue. - if (!isFlagSet(IsMainQueue)) { - exitSimLoop(cause, code); - setFlags(AutoDelete); - } - - // otherwise do nothing... the IsExitEvent flag takes care of - // exiting the simulation loop and returning this object to Python - - // but if you are doing this on intervals, don't forget to make another if (repeat) { - assert(isFlagSet(IsMainQueue)); - mainEventQueue.schedule(this, curTick() + repeat); + schedule(curTick() + repeat); } } const char * -SimLoopExitEvent::description() const +GlobalSimLoopExitEvent::description() const { - return "simulation loop exit"; + return "global simulation loop exit"; } void exitSimLoop(const std::string &message, int exit_code, Tick when, Tick repeat) { - Event *event = new SimLoopExitEvent(message, exit_code, repeat); - mainEventQueue.schedule(event, when); + new GlobalSimLoopExitEvent(when + simQuantum, message, exit_code, repeat); +} + +LocalSimLoopExitEvent::LocalSimLoopExitEvent(const std::string &_cause, + int _code, Tick _repeat) + : Event(Sim_Exit_Pri), cause(_cause), code(_code), repeat(_repeat) +{ +} + +// +// handle termination event +// +void +LocalSimLoopExitEvent::process() +{ + exitSimLoop(cause, 0); +} + +const char * +LocalSimLoopExitEvent::description() const +{ + return "local simulation loop exit"; } // diff -r 43d22d746e7a -r 5985e0c89872 src/sim/sim_exit.hh --- a/src/sim/sim_exit.hh Mon Aug 19 03:52:36 2013 -0400 +++ b/src/sim/sim_exit.hh Mon Aug 19 19:06:26 2013 -0500 @@ -39,8 +39,6 @@ // forward declaration class Callback; -class EventQueue; -class SimLoopExitEvent; /// Register a callback to be called when Python exits. Defined in /// sim/main.cc. diff -r 43d22d746e7a -r 5985e0c89872 src/sim/sim_object.cc --- a/src/sim/sim_object.cc Mon Aug 19 03:52:36 2013 -0400 +++ b/src/sim/sim_object.cc Mon Aug 19 19:06:26 2013 -0500 @@ -60,7 +60,7 @@ // SimObject constructor: used to maintain static simObjectList // SimObject::SimObject(const Params *p) - : EventManager(p->eventq), _params(p) + : EventManager(getEventQueue(p->eventq_index)), _params(p) { #ifdef DEBUG doDebugBreak = false; diff -r 43d22d746e7a -r 5985e0c89872 src/sim/simulate.hh --- a/src/sim/simulate.hh Mon Aug 19 03:52:36 2013 -0400 +++ b/src/sim/simulate.hh Mon Aug 19 19:06:26 2013 -0500 @@ -32,4 +32,4 @@ #include "base/types.hh" #include "sim/sim_events.hh" -SimLoopExitEvent *simulate(Tick num_cycles = MaxTick); +GlobalSimLoopExitEvent *simulate(Tick num_cycles = MaxTick); diff -r 43d22d746e7a -r 5985e0c89872 src/sim/simulate.cc --- a/src/sim/simulate.cc Mon Aug 19 03:52:36 2013 -0400 +++ b/src/sim/simulate.cc Mon Aug 19 19:06:26 2013 -0500 @@ -1,5 +1,6 @@ /* * Copyright (c) 2006 The Regents of The University of Michigan + * Copyright (c) 2013 Advanced Micro Devices, Inc. * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -29,6 +30,9 @@ * Steve Reinhardt */ +#include +#include + #include "base/misc.hh" #include "base/pollevent.hh" #include "base/types.hh" @@ -39,14 +43,60 @@ #include "sim/simulate.hh" #include "sim/stat_control.hh" +//! Mutex for handling async events. +std::mutex asyncEventMutex; + +//! Global barrier for synchronizing threads entering/exiting the +//! simulation loop. +Barrier *threadBarrier; + +//! forward declaration +Event *doSimLoop(EventQueue *); + +/** + * The main function for all subordinate threads (i.e., all threads + * other than the main thread). These threads start by waiting on + * threadBarrier. Once all threads have arrived at threadBarrier, + * they enter the simulation loop concurrently. When they exit the + * loop, they return to waiting on threadBarrier. This process is + * repeated until the simulation terminates. + */ +static void +thread_loop(EventQueue *queue) +{ + while (true) { + threadBarrier->wait(); + doSimLoop(queue); + } +} + /** Simulate for num_cycles additional cycles. If num_cycles is -1 * (the default), do not limit simulation; some other event must * terminate the loop. Exported to Python via SWIG. * @return The SimLoopExitEvent that caused the loop to exit. */ -SimLoopExitEvent * +GlobalSimLoopExitEvent * simulate(Tick num_cycles) { + // The first time simulate() is called from the Python code, we need to + // create a thread for each of event queues referenced by the + // instantiated sim objects. + static bool threads_initialized = false; + static std::vector threads; + + if (!threads_initialized) { + threadBarrier = new Barrier(numMainEventQueues); + + // the main thread (the one we're currently running on) + // handles queue 0, so we only need to allocate new threads + // for queues 1..N-1. We'll call these the "subordinate" threads. + for (uint32_t i = 1; i < numMainEventQueues; i++) { + threads.push_back(new std::thread(thread_loop, mainEventQueue[i])); + } + + threads_initialized = true; + } + inform("Entering event queue @ %d. Starting simulation...\n", curTick()); if (num_cycles < MaxTick - curTick()) @@ -54,38 +104,99 @@ else // counter would roll over or be set to MaxTick anyhow num_cycles = MaxTick; - Event *limit_event = - new SimLoopExitEvent("simulate() limit reached", 0); - mainEventQueue.schedule(limit_event, num_cycles); + GlobalEvent *limit_event = new GlobalSimLoopExitEvent(num_cycles, + "simulate() limit reached", 0, 0); + + GlobalSyncEvent *quantum_event = NULL; + if (numMainEventQueues > 1) { + if (simQuantum == 0) { + fatal("Quantum for multi-eventq simulation not specified"); + } + + quantum_event = new GlobalSyncEvent(simQuantum, simQuantum, + EventBase::Progress_Event_Pri, 0); + + inParallelMode = true; + } + + // all subordinate (created) threads should be waiting on the + // barrier; the arrival of the main thread here will satisfy the + // barrier, and all threads will enter doSimLoop in parallel + threadBarrier->wait(); + Event *local_event = doSimLoop(mainEventQueue[0]); + assert(local_event != NULL); + + inParallelMode = false; + + // locate the global exit event and return it to Python + BaseGlobalEvent *global_event = local_event->globalEvent(); + assert(global_event != NULL); + + GlobalSimLoopExitEvent *global_exit_event = + dynamic_cast(global_event); + assert(global_exit_event != NULL); + + // if we didn't hit limit_event, delete it. + if (global_exit_event != limit_event) { + assert(limit_event->scheduled()); + limit_event->deschedule(); + delete limit_event; + } + + //! Delete the simulation quantum event. + if (quantum_event != NULL) { + quantum_event->deschedule(); + delete quantum_event; + } + + return global_exit_event; +} + +/** + * Test and clear the global async_event flag, such that each time the + * flag is cleared, only one thread returns true (and thus is assigned + * to handle the corresponding async event(s)). + */ +static bool +testAndClearAsyncEvent() +{ + bool was_set = false; + asyncEventMutex.lock(); + + if (async_event) { + was_set = true; + async_event = false; + } + + asyncEventMutex.unlock(); + return was_set; +} + +/** + * The main per-thread simulation loop. This loop is executed by all + * simulation threads (the main thread and the subordinate threads) in + * parallel. + */ +Event * +doSimLoop(EventQueue *eventq) +{ + // set the per thread current eventq pointer + curEventQueue(eventq); + eventq->handleAsyncInsertions(); while (1) { // there should always be at least one event (the SimLoopExitEvent // we just scheduled) in the queue - assert(!mainEventQueue.empty()); - assert(curTick() <= mainEventQueue.nextTick() && + assert(!eventq->empty()); + assert(curTick() <= eventq->nextTick() && "event scheduled in the past"); - Event *exit_event = mainEventQueue.serviceOne(); + Event *exit_event = eventq->serviceOne(); if (exit_event != NULL) { - // hit some kind of exit event; return to Python - // event must be subclass of SimLoopExitEvent... - SimLoopExitEvent *se_event; - se_event = dynamic_cast(exit_event); - - if (se_event == NULL) - panic("Bogus exit event class!"); - - // if we didn't hit limit_event, delete it - if (se_event != limit_event) { - assert(limit_event->scheduled()); - limit_event->squash(); - hack_once("be nice to actually delete the event here"); - } - - return se_event; + return exit_event; } - if (async_event) { + if (async_event && testAndClearAsyncEvent()) { async_event = false; if (async_statdump || async_statreset) { Stats::schedStatEvent(async_statdump, async_statreset); @@ -113,4 +224,3 @@ // not reached... only exit is return on SimLoopExitEvent } - diff -r 43d22d746e7a -r 5985e0c89872 src/sim/stat_control.cc --- a/src/sim/stat_control.cc Mon Aug 19 03:52:36 2013 -0400 +++ b/src/sim/stat_control.cc Mon Aug 19 19:06:26 2013 -0500 @@ -59,7 +59,7 @@ #include "cpu/base.hh" #endif -#include "sim/eventq.hh" +#include "sim/global_event.hh" #include "sim/stat_control.hh" using namespace std; @@ -74,7 +74,7 @@ Time statTime(true); Tick startTick; -Event *dumpEvent; +GlobalEvent *dumpEvent; struct SimTicksReset : public Callback { @@ -216,7 +216,7 @@ /** * Event to dump and/or reset the statistics. */ -class StatEvent : public Event +class StatEvent : public GlobalEvent { private: bool dump; @@ -224,8 +224,8 @@ Tick repeat; public: - StatEvent(bool _dump, bool _reset, Tick _repeat) - : Event(Stat_Event_Pri, AutoDelete), + StatEvent(Tick _when, bool _dump, bool _reset, Tick _repeat) + : GlobalEvent(_when, Stat_Event_Pri, 0), dump(_dump), reset(_reset), repeat(_repeat) { } @@ -243,13 +243,14 @@ Stats::schedStatEvent(dump, reset, curTick() + repeat, repeat); } } + + const char *description() const { return "GlobalStatEvent"; } }; void schedStatEvent(bool dump, bool reset, Tick when, Tick repeat) { - dumpEvent = new StatEvent(dump, reset, repeat); - mainEventQueue.schedule(dumpEvent, when); + dumpEvent = new StatEvent(when + simQuantum, dump, reset, repeat); } void @@ -264,7 +265,7 @@ */ if (dumpEvent != NULL && (period == 0 || dumpEvent->scheduled())) { // Event should AutoDelete, so we do not need to free it. - mainEventQueue.deschedule(dumpEvent); + dumpEvent->deschedule(); } /* @@ -294,7 +295,7 @@ (dumpEvent->scheduled() && dumpEvent->when() < curTick())) { // shift by curTick() and reschedule Tick _when = dumpEvent->when(); - mainEventQueue.reschedule(dumpEvent, _when + curTick()); + dumpEvent->reschedule(_when + curTick()); } }